Beispiel #1
0
    def __init__(self):

        FundHolder.__init__(self)
        FundFactor.__init__(self)
        FundPool.__init__(self)
        FundStatic.__init__(self)
        FundExposure.__init__(self)
    def cal_weight_date(self, quarter_date):
        """ 单个季度公募主动股票基金平均权重 每个基金的权都为1 """

        fund_pool = FundPool().get_fund_pool_code(name="普通股票型基金",
                                                  date=quarter_date)

        for i_fund in range(len(fund_pool)):
            fund = fund_pool[i_fund]
            try:
                asset = self.total_asset.loc[fund, quarter_date] / 100000000
            except Exception as e:
                asset = 0.5
            try:
                fund_holding = FundHolder().get_fund_holding_quarter(fund=fund)
                fund_holding_date = pd.DataFrame(fund_holding[quarter_date])
                fund_holding_date = fund_holding_date.dropna()
                fund_holding_date *= asset
                fund_holding_date.columns = [fund]
            except Exception as e:
                fund_holding_date = pd.DataFrame([], columns=[fund])
            if i_fund == 0:
                stock_data = fund_holding_date
            else:
                stock_data = pd.concat([stock_data, fund_holding_date], axis=1)

        stock_data = stock_data.dropna(how='all')
        stock_data_weight = pd.DataFrame(stock_data.sum(axis=1))
        return stock_data_weight
Beispiel #3
0
    def cal_quarter_holding_allfund_quarter(self, quarter_date):
        """
        计算 季报日 普通股票+偏股混合基金 基金平均持仓
        """

        fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池",
                                                  date=quarter_date)
        halfyear_date = Date().get_last_fund_halfyear_date(
            Date().get_trade_date_offset(quarter_date, 15))
        fund_turnover = Fund().get_fund_turnover()
        fund_turnover = fund_turnover.loc[fund_pool, :]
        fund_turnover[fund_turnover < 15] = np.nan
        fund_turnover_date = pd.DataFrame(fund_turnover[halfyear_date])
        fund_turnover_date = fund_turnover_date.dropna()
        fund_turnover_date = fund_turnover_date.sort_values(by=[halfyear_date],
                                                            ascending=True)
        fund_pool = list(
            fund_turnover_date.index[0:int(len(fund_turnover_date) / 2)])

        for i_fund in range(len(fund_pool)):
            fund = fund_pool[i_fund]
            try:
                fund_holding = FundHolder().get_fund_holding_quarter(fund=fund)
                fund_holding_date = pd.DataFrame(fund_holding[quarter_date])
                fund_holding_date = fund_holding_date.dropna()
                fund_holding_date *= 1.0
                fund_holding_date.columns = [fund]
            except Exception as e:
                fund_holding_date = pd.DataFrame([], columns=[fund])
            if i_fund == 0:
                stock_data = fund_holding_date
            else:
                stock_data = pd.concat([stock_data, fund_holding_date], axis=1)

        stock_data = stock_data.dropna(how='all')
        stock_data_weight = pd.DataFrame(stock_data.sum(axis=1))
        stock_data_weight.columns = ["WEIGHT"]
        stock_data_weight /= stock_data_weight.sum()
        stock_data_weight.index.name = "CODE"

        sub_path = os.path.join(self.data_weight_path, self.name)
        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        file = os.path.join(sub_path, quarter_date + '_QuarterHolding.csv')
        stock_data_weight.to_csv(file)
Beispiel #4
0
    def cal_all_wind_file(self):
        """ 生成wind文件 """

        date_series = Date().get_normal_date_series("20150101",
                                                    datetime.today(), "S")
        fund_holding = FundHolder().get_fund_holding_halfyear(
            fund=self.fund_code)

        for i_date in range(len(date_series)):

            half_year_date = date_series[i_date]
            publish_date = Date().get_trade_date_offset(half_year_date, 0)
            try:
                fund_holding_date = pd.DataFrame(fund_holding[half_year_date])
                fund_holding_date = fund_holding_date.dropna()

                fund_holding_date.columns = ["Weight"]
                fund_holding_date = fund_holding_date.sort_values(
                    by=['Weight'], ascending=False)
                fund_holding_date["Weight"] /= 100.0
                fund_holding_date.loc[
                    'Cash', 'Weight'] = 1 - fund_holding_date["Weight"].sum()

                fund_holding_date.index.name = "Code"
                fund_holding_date["CreditTrading"] = "No"
                fund_holding_date["Date"] = publish_date
                fund_holding_date["Price"] = 0.0
                fund_holding_date["Direction"] = "Long"

                sub_path = os.path.join(self.wind_port_path, self.port_name)
                if not os.path.exists(sub_path):
                    os.makedirs(sub_path)

                file = os.path.join(
                    sub_path, '%s_%s.csv' % (self.port_name, publish_date))
                fund_holding_date.to_csv(file)
            except Exception as e:
                pass
Beispiel #5
0
    def cal_fund_holder_exposure_halfyear(self, fund_code, beg_date, end_date):
        """ 计算单个基金的半年持仓暴露(注意计算的是非满仓暴露) """

        # fund_code, beg_date, end_date = "000001.OF", "20170101", "20190101"

        type_list = ['COUNTRY', 'STYLE', 'INDUSTRY']
        barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values)
        out_file = os.path.join(
            self.halfyear_exposure_path,
            'Fund_Holder_Exposure_HalfYear_%s.csv' % fund_code)

        if not os.path.exists(out_file):
            beg_date = "20040101"

        date_series = Date().get_normal_date_series(beg_date,
                                                    end_date,
                                                    period='S')
        fund_holding = FundHolder().get_fund_stock_weight_halfyear(fund_code)

        if fund_holding is not None:
            date_series = list(set(date_series) & set(fund_holding.columns))
            date_series.sort()
            print(date_series)
        else:
            return None

        if len(date_series) > 0:

            for i_date in range(0, len(date_series)):

                date = date_series[i_date]
                report_date = Date().get_normal_date_month_end_day(date)
                trade_date = Date().get_trade_date_month_end_day(date)
                print("Calculate HalfYear Holder Exposure %s %s" %
                      (fund_code, report_date))

                barra_exposure = Barra().get_factor_exposure_date(
                    trade_date, type_list)
                fund_holding_date = FundHolder(
                ).get_fund_stock_weight_halfyear(fund_code)

                if (barra_exposure is None) or (len(fund_holding_date) == 0):
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])
                else:
                    fund_holding_date = pd.DataFrame(fund_holding[report_date])
                    fund_holding_date = fund_holding_date.dropna()
                    fund_holding_date = fund_holding_date.sort_values(
                        by=[report_date], ascending=False)
                    fund_holding_date.columns = ["Weight"]
                    fund_holding_date /= 100.0
                    data = pd.concat([fund_holding_date, barra_exposure],
                                     axis=1)
                    data = data.dropna()

                    if (len(data) == 0) or (data is None):
                        exposure_add = pd.DataFrame([],
                                                    columns=barra_name,
                                                    index=[report_date])
                    else:
                        exposure_add = pd.DataFrame([],
                                                    columns=barra_name,
                                                    index=[report_date])

                        for i_factor in range(len(barra_name)):

                            factor_name = barra_name[i_factor]
                            data_weight = data[['Weight', factor_name]]
                            data_weight['StockExposure'] = data_weight[
                                'Weight'] * data_weight[factor_name]
                            exp = data_weight['StockExposure'].sum()
                            exposure_add.ix[report_date, factor_name] = exp

                        country_name = Barra().get_factor_name(
                            ["COUNTRY"])["NAME_EN"].values[0]
                        position = FundFactor().get_fund_factor(
                            "Stock_Ratio",
                            date_list=[report_date],
                            fund_pool=[fund_code])
                        exposure_add.ix[
                            report_date,
                            country_name] = position.values[0][0] / 100

                if i_date == 0:
                    exposure_new = exposure_add
                else:
                    exposure_new = pd.concat([exposure_new, exposure_add],
                                             axis=0)
        else:
            exposure_new = pd.DataFrame([])

        # 合并新数据
        ####################################################################
        if os.path.exists(out_file):
            exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            exposure_old.index = exposure_old.index.map(str)
            params = FactorOperate().pandas_add_row(exposure_old, exposure_new)
        else:
            params = exposure_new

        if len(params) > 0:
            params = params[barra_name]
        params.to_csv(out_file)
Beispiel #6
0
    def cal_weight_date(self, date, quarter_date):

        days_diff = Date().get_trade_date_diff(quarter_date, date)
        fund_pool = FundPool().get_fund_pool_code(name="基金持仓基准基金池",
                                                  date=quarter_date)

        for i_fund in range(len(fund_pool)):

            fund = fund_pool[i_fund]
            try:
                fund_holding = FundHolder().get_fund_holding_quarter(fund=fund)
                fund_holding_date = pd.DataFrame(fund_holding[quarter_date])
                fund_holding_date = fund_holding_date.dropna()
                fund_holding_date *= 1.0
                fund_holding_date.columns = [fund]
            except Exception as e:
                fund_holding_date = pd.DataFrame([], columns=[fund])
            if i_fund == 0:
                stock_data = fund_holding_date
            else:
                stock_data = pd.concat([stock_data, fund_holding_date], axis=1)

        stock_data = stock_data.dropna(how='all')
        stock_data_weight = pd.DataFrame(stock_data.sum(axis=1))
        stock_data_weight /= stock_data_weight.sum()
        stock_data_weight.columns = ["Weight"]
        stock_data_weight = stock_data_weight.sort_values(by=['Weight'],
                                                          ascending=False)

        stock_ratio = pd.DataFrame(self.stock_ratio.loc[fund_pool,
                                                        quarter_date])
        ratio = stock_ratio.median().values[0] / 100.0

        if days_diff > 30:

            # 得到股票和基金涨跌幅

            stock_pool = list(stock_data_weight.index)
            beg_date = Date().get_trade_date_offset(date, -61)
            date_series = Date().get_trade_date_series(beg_date, date)
            f_pct = self.index_return / ratio
            s_pct = self.stock_return.loc[date_series, stock_pool]
            s_pct = s_pct.T.dropna(how='all').T
            s_pct = s_pct.dropna(how='all')
            f_pct = f_pct.dropna()

            # 准备数据Lasso回归

            data = pd.concat([f_pct, s_pct], axis=1)
            data = data.loc[beg_date:date, :]
            data = data.dropna(subset=['IndexReturn'])
            data = data.fillna(0.0)
            y = np.row_stack(data['IndexReturn'].values)
            x = data.iloc[:, 1:].values

            model = LassoCV(fit_intercept=True, positive=True)

            # LassoCV自动调节alpha可以实现选择最佳的alpha

            model.fit(x, y)
            print(model.alpha_)
            alpha = model.alpha_

            model = Lasso(alpha=alpha, fit_intercept=False, positive=True)
            model.fit(x, y)

            res = pd.DataFrame(model.coef_[model.coef_ > 0.0001],
                               index=s_pct.columns[model.coef_ > 0.0001],
                               columns=[date])
            res = res.sort_values(by=[date], ascending=False)
            result = pd.concat([res, stock_data_weight], axis=1)
            result = result.sort_values(by=['Weight'], ascending=False)

        else:
            result = stock_data_weight
        return result
    def cal_fund_holder_exposure_quarter(self, fund, beg_date, end_date):
        """  计算单个基金的季度持仓暴露 (前十大重仓暴露) """

        type_list = ['STYLE', 'COUNTRY', 'INDUSTRY']
        date_series = Date().get_normal_date_series(beg_date,
                                                    end_date,
                                                    period='Q')
        fund_holding = FundHolder().get_fund_stock_weight_quarter(fund)

        if fund_holding is not None:
            date_series = list(set(date_series) & set(fund_holding.columns))
            date_series.sort()
        else:
            return None

        for i_date in range(0, len(date_series)):

            date = date_series[i_date]
            report_date = Date().get_normal_date_month_end_day(date)
            trade_date = Date().get_trade_date_month_end_day(date)

            barra_name = list(
                Barra().get_factor_name(type_list)['NAME_EN'].values)
            barra_exposure = Barra().get_factor_exposure_date(
                trade_date, type_list)

            print(
                "########## Calculate Quarter Holder Exposure %s %s ##########"
                % (fund, report_date))

            if (barra_exposure is None) or (fund_holding is None):
                exposure_add = pd.DataFrame([],
                                            columns=barra_name,
                                            index=[report_date])
            else:
                fund_holding_date = pd.DataFrame(fund_holding[report_date])
                fund_holding_date = fund_holding_date.dropna()
                fund_holding_date = fund_holding_date.sort_values(
                    by=[report_date], ascending=False)
                fund_holding_date.columns = ["Weight"]
                data = pd.concat([fund_holding_date, barra_exposure], axis=1)
                data = data.dropna()

                if (len(data) == 0) or (data is None):
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])
                else:
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])

                    for i_factor in range(len(barra_name)):
                        factor_name = barra_name[i_factor]
                        data_weight = data[['Weight', factor_name]]
                        data_weight['StockExposure'] = data['Weight'] * data[
                            factor_name]
                        exposure_add.ix[report_date,
                                        factor_name] = data_weight[
                                            'StockExposure'].sum() / 100.0

                    country_name = Barra().get_factor_name(
                        ["COUNTRY"])["NAME_EN"].values[0]
                    position = FundFactor().get_fund_factor(
                        "Stock_Ratio",
                        date_list=[report_date],
                        fund_pool=[fund])
                    position = position.values[0][0]
                    exposure_add.ix[report_date, country_name] = position / 100

            if i_date == 0:
                exposure_new = exposure_add
            else:
                exposure_new = pd.concat([exposure_new, exposure_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = os.path.join(self.data_path_exposure,
                                'fund_holding_exposure_quarter')
        out_file = os.path.join(
            out_path, 'Fund_Holder_Exposure_Quarter_' + fund + '.csv')

        if os.path.exists(out_file):
            exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            exposure_old.index = exposure_old.index.map(str)
            params = FactorOperate().pandas_add_row(exposure_old, exposure_new)
        else:
            params = exposure_new
        params.to_csv(out_file)
    def get_data(self):

        self.industry_data = FundHolder().get_fund_holding_industry_all()
        self.stock_data = FundHolder().get_fund_holding_all()
Beispiel #9
0
    def cal_fund_holder_exposure(self, fund, beg_date, end_date):

        # 每半年计算一次
        type_list = ['STYLE', 'COUNTRY', 'INDUSTRY']
        date_series = Date().get_normal_date_series(beg_date,
                                                    end_date,
                                                    period='S')

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            report_date = Date().get_normal_date_month_end_day(date)
            trade_date = Date().get_trade_date_month_end_day(date)

            barra_name = list(
                Barra().get_factor_name(type_list)['NAME_EN'].values)
            barra_exposure = Barra().get_factor_exposure_date(
                trade_date, type_list)
            fund_holding = FundHolder().get_fund_holding_report_date_fund(
                fund, report_date)
            print("########## Calculate Holder Exposure %s %s ##########" %
                  (fund, report_date))

            if (barra_exposure is None) or (fund_holding is None):
                exposure_add = pd.DataFrame([],
                                            columns=barra_name,
                                            index=[report_date])
            else:
                fund_holding = fund_holding['Weight']
                data = pd.concat([fund_holding, barra_exposure], axis=1)
                data = data.dropna()

                if (len(data) == 0) or (data is None):
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])
                else:
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])

                    for i_factor in range(len(barra_name)):
                        factor_name = barra_name[i_factor]
                        data_weight = data[['Weight', factor_name]]
                        data_weight['StockExposure'] = data['Weight'] * data[
                            factor_name]
                        exposure_add.ix[report_date,
                                        factor_name] = data_weight[
                                            'StockExposure'].sum() / 100.0

            if i_date == 0:
                exposure_new = exposure_add
            else:
                exposure_new = pd.concat([exposure_new, exposure_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = Parameter().get_read_file(self.holder_exposure_name)
        out_file = os.path.join(out_path,
                                'Fund_Holder_Exposure_' + fund + '.csv')

        if os.path.exists(out_file):
            exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            exposure_old.index = exposure_old.index.map(str)
            params = pandas_add_row(exposure_old, exposure_new)
        else:
            params = exposure_new
        params.to_csv(out_file)
    def get_fund_holder_data(self, quarter_date, quarter_last_date):
        """ 得到数据 """

        # date
        quarter_trade_date = Date().get_trade_date_offset(quarter_date, 0)
        quarter_last_trade_date = Date().get_trade_date_offset(
            quarter_last_date, 0)
        print(quarter_date, quarter_last_date)
        print(quarter_trade_date, quarter_last_trade_date)

        # share
        data = FundHolder().get_fund_holding_stock_all()
        data_quarter = data[data.ReportDate == quarter_date]
        data_quarter = data_quarter[data_quarter.PublishDate <= Date().
                                    get_trade_date_offset(quarter_date, 20)]
        quarter_share = pd.DataFrame(
            data_quarter.groupby(by=['StockCode']).sum()['Share'])

        data_quarter = data[data.ReportDate == quarter_last_date]
        data_quarter = data_quarter[data_quarter.PublishDate <= Date(
        ).get_trade_date_offset(quarter_last_date, 20)]
        quarter_last_share = pd.DataFrame(
            data_quarter.groupby(by=['StockCode']).sum()['Share'])

        # price
        adjust_factor = Stock().read_factor_h5("AdjustFactor")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        quarter_adjust = adjust_factor[quarter_trade_date] / adjust_factor[
            quarter_last_trade_date]
        quarter_price = price_unadjust[quarter_trade_date]
        quarter_price_last = price_unadjust[quarter_last_trade_date]
        average_price = (
            price_unadjust[quarter_trade_date] +
            price_unadjust[quarter_last_trade_date] / quarter_adjust) / 2.0

        # industry
        industry = Stock().read_factor_h5("industry_citic1")
        industry_date = pd.DataFrame(industry[industry.columns[-1]])
        industry_date.columns = ['Industry']

        # concat
        result = pd.concat([
            quarter_share, quarter_last_share, quarter_price,
            quarter_price_last, average_price, quarter_adjust, industry_date
        ],
                           axis=1)
        result.columns = [
            'ShareQuarter', 'ShareQuarterLast', 'PriceQuarter',
            'PriceQuarterLast', 'PriceMean', 'Adjust', 'Industry'
        ]

        result = result.dropna(subset=['Adjust', 'Industry'])
        result = result.fillna(0.0)

        # cal
        result['ShareQuarterLastAdjust'] = result['ShareQuarterLast'] * result[
            'Adjust']
        result['MvQuarter'] = result['ShareQuarter'] * result['PriceQuarter']
        result['MvQuarterLast'] = result['ShareQuarterLast'] * result[
            'PriceQuarterLast']
        result['Inflow'] = (
            result['ShareQuarter'] -
            result['ShareQuarterLastAdjust']) * result['PriceMean']

        result['MvQuarter'] /= 100000000.0
        result['Inflow'] /= 100000000.0
        result['MvQuarterLast'] /= 100000000.0

        return result
 def update_data(self):
     """ 更新所需要的数据 """
     Date().load_trade_date_series("D")
     Stock().load_h5_primary_factor()
     FundHolder().load_fund_holding_stock()