def get_stock_characteristic_size(self):
        """ 最近交易日在 在市值因子(自由流通市值)上的暴露 """

        # get data
        fund_all_stock = self.fund_hold_stock.copy()
        fund_all_stock.columns = ['FundWeight']
        stock_sum_all = fund_all_stock['FundWeight'].sum()
        mkt_free = Stock().read_factor_h5("Mkt_freeshares")

        date = self.last_trade_date
        mkt_free_date = pd.DataFrame(mkt_free[date])
        mkt_free_date /= 100000000.0
        mkt_free_date.columns = ['Mkt']

        index_weight_300 = Index().get_weight_date(index_code="000300.SH",
                                                   date=date)
        index_weight_300.columns = ['300Weight']
        index_weight_500 = Index().get_weight_date(index_code="000905.SH",
                                                   date=date)
        index_weight_500.columns = ['500Weight']

        # 市值中位数
        # concat_data = pd.concat([fund_all_stock, mkt_free_date], axis=1)
        # concat_data = concat_data.dropna()
        # concat_data = concat_data.sort_values(by=['FundWeight'], ascending=False)
        # size_median = np.round(concat_data['Mkt'].median(), 2)

        # 300成分占比
        concat_data = pd.concat([fund_all_stock, index_weight_300], axis=1)
        concat_data = concat_data.dropna()
        stock_300_weight = concat_data['FundWeight'].sum()
        stock_300_weight /= stock_sum_all

        # 500成分占比
        concat_data = pd.concat([fund_all_stock, index_weight_500], axis=1)
        concat_data = concat_data.dropna()
        stock_500_weight = concat_data['FundWeight'].sum()
        stock_500_weight /= stock_sum_all

        # 其他成分占比
        other_weight = 1 - stock_300_weight - stock_500_weight

        stock_characteristic_size = pd.DataFrame(
            [stock_300_weight, stock_500_weight, other_weight],
            index=['沪深300成分股权重', '中证500成分股权重', '其他成分股权重'],
            columns=['数值'])
        stock_characteristic_size['持股特征(%s)' % self.last_trade_date] = '市值分布'
        stock_characteristic_size['具体表现'] = stock_characteristic_size.index
        return stock_characteristic_size
Beispiel #2
0
    def get_benchmark_weight_date(self, date):
        """ 得到 股票基准权重 """

        benchmark_weight = Index().get_weight_date(
            index_code=self.benchmark_code, date=date)
        benchmark_weight.columns = ['BenchWeight']
        return benchmark_weight
Beispiel #3
0
    def get_index_pct(self):
        """ 指数某一段时间内的涨跌幅 """

        index_pct = Index().get_index_factor(self.index_code,
                                             self.beg_date_pre, self.end_date)
        index_pct['PCT'] = index_pct['CLOSE'].pct_change()
        index_pct.columns = ['指数收盘', '指数涨跌幅']
        index_pct = index_pct.dropna()

        return index_pct
Beispiel #4
0
    def cal_index_excess_pct(self):

        """ 计算指数超额收益率 """

        name = "Market"
        index_pct = Index().get_index_factor("000985.CSI", attr=['CLOSE'])
        index_pct = index_pct.iloc[0:-1, :]
        index_pct = index_pct.pct_change()
        index_pct.columns = ['StockIndexReturn']
        index_pct = index_pct['StockIndexReturn'] * 100

        free_pct = Macro().get_daily_risk_free_rate()
        free_pct = free_pct['RiskFreeRate']
        index_excess_pct = index_pct.sub(free_pct, axis='index')
        index_excess_pct = index_excess_pct.dropna()
        index_excess_pct = pd.DataFrame(index_excess_pct)
        index_excess_pct.columns = [name]
        index_excess_pct['CumSumReturn'] = index_excess_pct[name].cumsum()

        index_excess_pct.to_csv(os.path.join(self.data_path, 'factor_return', 'FactorReturn_%s.csv' % name))
Beispiel #5
0
    def filter_fund_pool(self, index_code, index_name, end_date,
                         track_error_up):
        """ 得到沪深300 、中证500基金池 """

        # 参数
        # end_date = "20181231"
        # index_name = '沪深300'
        # index_code = '000300.SH'
        # track_error_up = 0.03
        beg_date = Date().get_trade_date_offset(end_date, -250)

        # 读取数据
        fund_nav = Fund().get_fund_factor("Repair_Nav")
        index_close = Index().get_index_factor(index_code, attr=['CLOSE'])
        index_close.columns = [index_code]
        result = pd.DataFrame([],
                              index=fund_nav.columns,
                              columns=['跟踪误差', '数据长度'])

        # 计算最近1年跟踪误差数据
        fund_nav = fund_nav.loc[index_close.index, :]
        fund_pct = fund_nav.pct_change()
        index_pct = index_close.pct_change()
        index_pct = index_pct[index_code]
        fund_excess_pct = fund_pct.sub(index_pct, axis='index')
        fund_excess_pct_period = fund_excess_pct.loc[beg_date:end_date, :]
        result.loc[:, "数据长度"] = fund_excess_pct_period.count()
        result.loc[:, "跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250)

        # 筛选
        result = result.dropna()
        result = result[result['数据长度'] > self.data_min_length]
        result = result[result['跟踪误差'] < track_error_up]

        # concat fund basic info
        data_pd = Fund().get_wind_fund_info()
        data_pd = data_pd[[
            'BenchMark', 'Name', 'FullName', 'SetupDate', 'InvestType'
        ]]
        data_pd.columns = ['基金基准', '基金简称', '基金全称', '上市日期', '基金类型']
        data = pd.concat([data_pd, result], axis=1)
        data = data.dropna()
        data = data[data["基金基准"].map(lambda x: index_name in x)]
        data = data[data["上市日期"] < beg_date]
        data = data[data["基金全称"].map(lambda x: "交易型开放式指数" not in x)]
        data = data[data["基金全称"].map(lambda x: "联接" not in x)]
        data['A类基金'] = data['基金简称'].map(Fund().if_a_fund)
        data = data[data['A类基金'] == 'A类基金']

        # 输出结果
        out_path = os.path.join(self.data_path, "filter_fund_pool")
        file_name = os.path.join(out_path, '基金最终筛选池_' + index_name + '.xlsx')

        sheet_name = "基金筛选池"
        num_format_pd = pd.DataFrame([],
                                     columns=data.columns,
                                     index=['format'])
        num_format_pd.ix['format', :] = '0.00'
        num_format_pd.ix['format', '跟踪误差'] = '0.00%'
        num_format_pd.ix['format', '数据长度'] = '0'

        excel = WriteExcel(file_name)
        worksheet = excel.add_worksheet(sheet_name)
        excel.write_pandas(data,
                           worksheet,
                           begin_row_number=0,
                           begin_col_number=1,
                           num_format_pd=num_format_pd,
                           color="red",
                           fillna=True)
Beispiel #6
0
    def calculate_fund_factor(self, index_code, index_name, end_date):
        """ 计算基金最近一段时间内 跟踪误差、超额收益、信息比率 """

        # 参数
        # index_code = '000905.SH'
        # index_name = '中证500'
        # end_date = '20151231'
        beg_date = Date().get_trade_date_offset(end_date, -self.data_length)

        # 读取数据 基金池 基金净值数据 指数收盘价数据
        file = os.path.join(self.data_path, 'filter_fund_pool',
                            '基金最终筛选池_' + index_name + '.xlsx')
        fund_code = pd.read_excel(file, index_col=[1], encoding='gbk')
        fund_code['上市日期'] = fund_code['上市日期'].map(str)

        fund_nav = Fund().get_fund_factor("Repair_Nav")
        index_close = Index().get_index_factor(index_code, attr=['CLOSE'])
        index_close.columns = [index_code]

        # 筛选新基金 并下载基金规模
        fund_code = fund_code.loc[:, ['上市日期', '基金全称', '基金简称']]
        fund_code = fund_code[fund_code['上市日期'] < beg_date]

        fund_code_str = ','.join(fund_code.index)
        fund_asset = w.wss(fund_code_str, "netasset_total",
                           "unit=1;tradeDate=" + str(end_date))
        fund_asset = pd.DataFrame(fund_asset.Data,
                                  index=['基金规模'],
                                  columns=fund_asset.Codes).T
        fund_asset['基金规模'] /= 100000000.0
        fund_asset['基金规模'] = fund_asset['基金规模'].round(2)
        fund_asset = fund_asset[fund_asset['基金规模'] > 0.45]
        fund_info = pd.concat([fund_code, fund_asset], axis=1)
        fund_info = fund_info.dropna()

        # 计算最近1年 各项指标
        result = pd.DataFrame([], index=fund_code.index, columns=['跟踪误差'])
        fund_nav = fund_nav.ix[index_close.index, fund_code.index]
        fund_pct = fund_nav.pct_change()
        index_pct = index_close.pct_change()
        index_pct = index_pct[index_code]
        fund_excess_pct = fund_pct.sub(index_pct, axis='index')
        fund_excess_pct_period = fund_excess_pct.loc[beg_date:end_date, :]
        fund_nav_period = fund_nav.loc[beg_date:end_date, :]
        index_close_period = index_close.loc[beg_date:end_date, :]
        result.ix[:, "数据长度"] = fund_excess_pct_period.count()
        result.ix[:, "跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250)

        fund_return_log = (fund_nav_period.pct_change() + 1.0).applymap(
            np.log).cumsum().ix[-1, :]
        fund_return = fund_return_log.map(np.exp) - 1
        last_date_close = index_close_period.iloc[len(fund_nav_period) - 1, :]
        first_date_close = index_close_period.iloc[0, :]
        result.ix[:, "基金涨跌"] = fund_return
        result.ix[:, "指数涨跌"] = (last_date_close / first_date_close -
                                1.0).values[0]
        result.ix[:, "超额收益"] = result.ix[:, "基金涨跌"] - result.ix[:, "指数涨跌"]
        result.ix[:, "信息比率"] = result.ix[:, "超额收益"] / result.ix[:, "跟踪误差"]

        result = result[result['数据长度'] > self.data_min_length]
        result = pd.concat([fund_info, result], axis=1)
        result = result.sort_values(by=['信息比率'], ascending=False)
        result = result.dropna()
        result = result.fillna("")

        # 写到EXCEL表
        out_path = os.path.join(self.data_path, "cal_fund_factor", index_name)
        file_name = os.path.join(
            out_path, '基金指标_' + index_name + '_' + end_date + '.xlsx')

        num_format_pd = pd.DataFrame([],
                                     columns=result.columns,
                                     index=['format'])
        num_format_pd.ix['format', :] = '0.00%'
        num_format_pd.ix['format', '数据长度'] = '0'
        num_format_pd.ix['format', '信息比率'] = '0.00'
        num_format_pd.ix['format', '基金规模'] = '0.00'
        num_format_pd.ix['format', '信息比率'] = '0.00'

        sheet_name = "基金指标"
        excel = WriteExcel(file_name)
        worksheet = excel.add_worksheet(sheet_name)
        excel.write_pandas(result,
                           worksheet,
                           begin_row_number=0,
                           begin_col_number=0,
                           num_format_pd=num_format_pd,
                           color="red",
                           fillna=True)
Beispiel #7
0
def AttributeMfctedaFund(index_code_ratio, fund_code, index_code, fund_name,
                         beg_date, end_date, fund_id, path, type,
                         mg_fee_ratio):
    """
    将某只基金 一段时间内 每日净值涨跌 拆分
    """

    # 参数举例
    ######################################################################################
    # index_code_ratio = 0.95
    # fund_code = '162216.OF'
    # index_code = '000905.SH'
    # fund_name = '泰达中证500指数分级'
    # beg_date = '20180101'
    # end_date = '20180731'
    # fund_id = 38
    # path = 'C:\\Users\\doufucheng\\OneDrive\\Desktop\\data\\'
    # type = '专户'

    # 读取基金复权涨跌幅
    ################################################################################################
    beg_date = Date().get_trade_date_offset(beg_date, -0)

    if type == "专户":
        fund_pct = MfcData().get_fund_nav_adjust(
            fund_name,
            Date().get_trade_date_offset(beg_date, -2), end_date)
        fund_pct['基金涨跌幅'] = fund_pct['累计复权净值'].pct_change()
    else:
        fund_pct = MfcData().get_mfcteda_public_fund_pct_wind(
            fund_code, beg_date, end_date)
        fund_pct.columns = ['基金涨跌幅']

    # 指数收益 持仓数据
    # 净值 = 股票资产 + 债券资产 + 基金资产 + 回购资产 + 当前现金余额 + 累计应收 - 累计应付
    # 累计应收 和 累计应付 代表 每日申赎 计提 交易管理费用等 未结算至现金的部分
    # 这里并没有按照每日拆分净值的方式计算 而是按照每日拆分当日总浮动盈亏 = 前日净值 * 当日基金复权涨跌幅
    ################################################################################################
    index_pct = Index().get_index_factor(
        index_code,
        Date().get_trade_date_offset(beg_date, -1), end_date, ['CLOSE'])
    index_pct = index_pct.pct_change()
    index_pct.columns = ['指数涨跌幅']
    fund_asset = MfcData().get_fund_asset_period(fund_id, beg_date, end_date)
    close_unadjust = Stock().get_factor_h5("Price_Unadjust", None,
                                           "primary_mfc")
    adjust_factor = Stock().get_factor_h5("AdjustFactor", None, "primary_mfc")
    fund_asset['股票资产-汇总'] = fund_asset['股票资产']

    data = pd.concat([fund_pct, index_pct, fund_asset], axis=1)
    data = data.dropna(subset=['基金涨跌幅', '指数涨跌幅'])

    data['昨日净值'] = data['净值'].shift(1)
    data['昨日基金份额'] = data['基金份额'].shift(1)
    data['昨日单位净值'] = data['单位净值'].shift(1)

    # 计算 每一日 新股收益 股票收益
    ################################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)

    for i_date in range(len(date_series)):

        date = date_series[i_date]
        new_stock_return, new_stock_asset = CalNewStockReturnDaily(
            fund_name,
            date,
            path,
            close_unadjust,
            adjust_factor,
            cal_type="close")
        stock_return, mg_fee, trade_fee, stock_asset = CalStockReturnDaily(
            fund_name,
            date,
            path,
            close_unadjust,
            adjust_factor,
            mg_fee_ratio,
            cal_type="close")
        data.loc[date, '新股资产'] = new_stock_asset
        data.loc[date, '股票资产'] = stock_asset
        data.loc[date, '新股盈亏'] = new_stock_return
        data.loc[date, '股票盈亏'] = stock_return
        data.loc[date, '管理托管费用'] = mg_fee
        data.loc[date, '交易印花费用'] = trade_fee

        new_stock_return, new_stock_asset = CalNewStockReturnDaily(
            fund_name,
            date,
            path,
            close_unadjust,
            adjust_factor,
            cal_type="average")
        stock_return, mg_fee, trade_fee, stock_asset = CalStockReturnDaily(
            fund_name,
            date,
            path,
            close_unadjust,
            adjust_factor,
            mg_fee_ratio,
            cal_type="average")
        data.loc[date, '新股盈亏-TradePrice'] = new_stock_return
        data.loc[date, '股票盈亏-TradePrice'] = stock_return

    data = data.dropna(subset=['基金涨跌幅', '指数涨跌幅'])
    data[['新股盈亏', '当日股票总盈亏金额']] = data[['新股盈亏', '当日股票总盈亏金额']].fillna(0.0)
    data = data[data['股票资产'] > 0.0]
    data['股票仓位'] = data['股票资产'] / data['净值']
    data['昨日股票仓位'] = data['股票仓位'].shift(1)

    # 资产盈亏 = 股票盈亏 + 新股盈亏 + 债券其他 + 托管管理费 + 交易印花费
    ################################################################################################
    cols = ['管理托管费用', '交易印花费用', '股票盈亏', '新股盈亏']
    data[cols] = data[cols].fillna(0.0)
    data[
        '汇总盈亏'] = data['管理托管费用'] + data['交易印花费用'] + data['股票盈亏'] + data['新股盈亏']
    data['日内交易盈亏'] = data['股票盈亏'] - data['股票盈亏-TradePrice'] + data[
        '新股盈亏'] - data['新股盈亏-TradePrice']
    data['资产盈亏'] = data['基金涨跌幅'] * data['昨日净值']
    data['固收其他盈亏'] = data['资产盈亏'] - data['汇总盈亏'] - data['日内交易盈亏']
    data['昨日股票资产'] = data['股票资产'].shift(1)
    data['股票涨跌幅'] = data['股票盈亏'] / data['昨日股票资产']

    # 股票盈亏 = 基准盈亏 + 超额盈亏
    ################################################################################################
    data['基准盈亏'] = data['指数涨跌幅'] * data['昨日净值'] * index_code_ratio
    data['超额盈亏'] = data['昨日股票仓位'] * data['股票涨跌幅'] * data['昨日净值'] - data['基准盈亏']

    # 超额盈亏 = 择时(资产配置能力) + 选股能力
    ################################################################################################
    data['择时盈亏'] = (data['昨日股票仓位'] -
                    index_code_ratio) * data['指数涨跌幅'] * data['昨日净值']
    data['选股盈亏'] = data['昨日股票仓位'] * (data['股票涨跌幅'] -
                                     data['指数涨跌幅']) * data['昨日净值']
    data['全仓选股盈亏'] = (data['股票涨跌幅'] - data['指数涨跌幅']) * data['昨日净值']

    # 以单位净值计算
    ################################################################################################
    data['净值-资产盈亏'] = data['资产盈亏'] / data['昨日基金份额']
    data['净值-管理托管费用'] = data['管理托管费用'] / data['昨日基金份额']
    data['净值-交易印花费用'] = data['交易印花费用'] / data['昨日基金份额']
    data['净值-股票盈亏'] = data['股票盈亏'] / data['昨日基金份额']
    data['净值-新股盈亏'] = data['新股盈亏'] / data['昨日基金份额']
    data['净值-固收其他盈亏'] = data['固收其他盈亏'] / data['昨日基金份额']
    data['净值-日内交易盈亏'] = data['日内交易盈亏'] / data['昨日基金份额']
    data['净值-基准盈亏'] = data['基准盈亏'] / data['昨日基金份额']
    data['净值-超额盈亏'] = data['超额盈亏'] / data['昨日基金份额']
    data['净值-择时盈亏'] = data['择时盈亏'] / data['昨日基金份额']
    data['净值-选股盈亏'] = data['选股盈亏'] / data['昨日基金份额']
    data['净值-全仓选股盈亏'] = data['全仓选股盈亏'] / data['昨日基金份额']

    index = [
        '净值-资产盈亏', '净值-股票盈亏', '净值-新股盈亏', '净值-固收其他盈亏', '净值-日内交易盈亏', '净值-管理托管费用',
        '净值-交易印花费用', '净值-基准盈亏', '净值-超额盈亏', '净值-择时盈亏', '净值-选股盈亏', '净值-全仓选股盈亏'
    ]

    # 按照 百分比 收益率计算
    ################################################################################################
    data = data.dropna(subset=['昨日单位净值'])
    nav = data.loc[data.index[0], '昨日单位净值']
    pct = data['净值-资产盈亏'].sum() / nav
    result = pd.DataFrame([], columns=['净值变化', '百分比', '收益率'], index=index)

    result.loc[index, '净值变化'] = data.loc[:, index].sum()
    result.loc[index,
               '百分比'] = result.loc[index, '净值变化'] / result.loc['净值-资产盈亏',
                                                               '净值变化']
    result.loc[index, '收益率'] = result.loc[index, '百分比'] * pct

    # 年化收益率 开始时间 结束时间
    ################################################################################################
    result.index = [
        '基金整体', '股票部分', '新股部分', '固收+其他部分', "日内交易部分", '管理托管', '交易印花', '股票基准',
        '股票超额', '股票择时', '股票选股', '全仓股票选股'
    ]
    days = (datetime.strptime(end_date, '%Y%m%d') -
            datetime.strptime(beg_date, '%Y%m%d')).days
    result.loc[:, '年化收益'] = result.loc[:, '收益率'].map(lambda x: (x + 1)**
                                                     (365 / days) - 1.0)
    result.loc['股票仓位', :] = data['股票仓位'].mean()
    result.loc['开始时间', :] = data.index[0]
    result.loc['结束时间', :] = data.index[-1]
    ################################################################################################

    # 写入每天的拆分
    ####################################################################################################################
    num_format_pd = pd.DataFrame([], columns=data.columns, index=['format'])
    num_format_pd.ix['format', :] = '0.00'
    num_format_pd.ix['format',
                     ['基金涨跌幅', '指数涨跌幅', '股票仓位', '昨日股票仓位', '股票涨跌幅']] = '0.00%'
    num_format_pd.ix['format', [
        '单位净值', '昨日单位净值', '净值-管理托管费用', '净值-交易印花费用', '净值-股票盈亏', '净值-新股盈亏',
        '净值-固收其他盈亏', '净值-基准盈亏', '净值-择时盈亏', '净值-选股盈亏', '净值-资产盈亏', '净值-全仓选股盈亏'
    ]] = '0.0000'

    begin_row_number = 0
    begin_col_number = 1
    color = "red"
    save_path = os.path.join(path, fund_name, "整体")
    file_name = os.path.join(
        save_path, "归因_" + fund_name + '_' + str(data.index[0]) + '_' +
        str(data.index[-1]) + ".xlsx")
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    sheet_name = fund_name
    excel = WriteExcel(file_name)
    worksheet = excel.add_worksheet(sheet_name)
    excel.write_pandas(data,
                       worksheet,
                       begin_row_number=begin_row_number,
                       begin_col_number=begin_col_number,
                       num_format_pd=num_format_pd,
                       color=color,
                       fillna=True)
    excel.close()

    # 写入汇总的拆分
    ####################################################################################################################
    num_format_pd = pd.DataFrame([], columns=result.columns, index=['format'])
    num_format_pd.ix['format', :] = '0.00%'
    num_format_pd.ix['format', ['净值变化']] = '0.0000'

    begin_row_number = 0
    begin_col_number = 1
    color = "red"
    save_path = os.path.join(path, fund_name, "整体")
    file_name = os.path.join(
        save_path, "归因汇总_" + fund_name + '_' + str(data.index[0]) + '_' +
        str(data.index[-1]) + ".xlsx")
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    sheet_name = fund_name
    excel = WriteExcel(file_name)
    worksheet = excel.add_worksheet(sheet_name)
    excel.write_pandas(result,
                       worksheet,
                       begin_row_number=begin_row_number,
                       begin_col_number=begin_col_number,
                       num_format_pd=num_format_pd,
                       color=color,
                       fillna=True)
    excel.close()
Beispiel #8
0
    def back_test_timing_factor(self, factor_name, index_code):
        """ 回测择时指标 """

        data = self.get_factor_exposure(factor_name)
        index_pct = Index().get_index_factor(index_code, attr=['CLOSE'])
        index_pct = index_pct.pct_change()
        index_pct.columns = ['IndexReturn']

        data = pd.concat([data, index_pct['IndexReturn']], axis=1)
        data = data.dropna(subset=['RawTimer', 'Timer', 'IndexReturn'])
        data['IndexNextReturn'] = data['IndexReturn'].shift(-1)

        data['LongTimer'] = data['Timer'].map(lambda x: x if x >= 0 else 0)
        data['ShortTimer'] = data['Timer'].map(lambda x: x if x <= 0 else 0)
        data['SPortNextReturn'] = data['IndexNextReturn'] * data['ShortTimer']
        data['LPortNextReturn'] = data['IndexNextReturn'] * data['LongTimer']
        data['LSPortNextReturn'] = data['IndexNextReturn'] * data['Timer']

        data['SPortCumReturn'] = data['SPortNextReturn'].cumsum()
        data['LPortCumReturn'] = data['LPortNextReturn'].cumsum()
        data['LSPortCumReturn'] = data['LSPortNextReturn'].cumsum()
        data['IndexCumReturn'] = data['IndexReturn'].cumsum()

        col_output = [
            "SPortCumReturn", "LPortCumReturn", "LSPortCumReturn",
            "IndexCumReturn"
        ]
        data_plot = data[col_output]
        ax = data_plot.plot()
        fig = ax.get_figure()
        file = os.path.join(self.data_path, 'factor_picture',
                            factor_name + 'fig.png')
        fig.savefig(file)

        result = pd.DataFrame([], columns=[factor_name])

        pos_corr = data['IndexNextReturn'].corr(data['Timer'])
        raw_corr = data['IndexNextReturn'].corr(data['RawTimer'])
        mean_zero = data.loc[data['Timer'] == 0, 'IndexNextReturn'].mean()

        mean_positive_profit = data.loc[(data['Timer'] > 0) &
                                        (data['IndexNextReturn'] > 0),
                                        'IndexNextReturn'].mean()
        mean_positive_loss = data.loc[(data['Timer'] > 0) &
                                      (data['IndexNextReturn'] <= 0),
                                      'IndexNextReturn'].mean()
        mean_negative_loss = -data.loc[(data['Timer'] < 0) &
                                       (data['IndexNextReturn'] >= 0),
                                       'IndexNextReturn'].mean()
        mean_negative_profit = -data.loc[(data['Timer'] < 0) &
                                         (data['IndexNextReturn'] < 0),
                                         'IndexNextReturn'].mean()

        number_positive = len(data.loc[data['Timer'] > 0, 'IndexNextReturn'])
        number_negative = len(data.loc[data['Timer'] < 0, 'IndexNextReturn'])
        number_zero = len(data.loc[data['Timer'] == 0, 'IndexNextReturn'])

        number_positive_profit = len(
            data.loc[(data['Timer'] > 0) & (data['IndexNextReturn'] > 0),
                     'IndexNextReturn'])
        number_negative_profit = len(
            data.loc[(data['Timer'] < 0) & (data['IndexNextReturn'] < 0),
                     'IndexNextReturn'])

        positive_wining_ratio = number_positive_profit / number_positive
        positive_profit_loss_ratio = -mean_positive_profit / mean_positive_loss
        negative_wining_ratio = number_negative_profit / number_negative
        negative_profit_loss_ratio = -mean_negative_profit / mean_negative_loss

        result.loc['开始时间', factor_name] = data.index[0]
        result.loc['结束时间', factor_name] = data.index[-1]
        result.loc['仓位相关系数', factor_name] = pos_corr
        result.loc['原始相关系数', factor_name] = raw_corr
        result.loc['多头收益', factor_name] = mean_positive_profit
        result.loc['多头损失', factor_name] = mean_positive_loss
        result.loc['空头收益', factor_name] = mean_negative_profit
        result.loc['空头损失', factor_name] = mean_negative_loss
        result.loc['空仓收益', factor_name] = mean_zero
        result.loc['多头信号数量', factor_name] = number_positive
        result.loc['空头信号数量', factor_name] = number_negative
        result.loc['空仓信号数量', factor_name] = number_zero
        result.loc['多头胜率', factor_name] = positive_wining_ratio
        result.loc['多头盈亏比', factor_name] = positive_profit_loss_ratio
        result.loc['空头胜率', factor_name] = negative_wining_ratio
        result.loc['空头盈亏比', factor_name] = negative_profit_loss_ratio

        file = os.path.join(self.data_path, 'factor_backtest',
                            factor_name + '_Result.csv')
        result.to_csv(file)

        file = os.path.join(self.data_path, 'factor_backtest',
                            factor_name + '_Return.csv')
        data.to_csv(file)
    def cal_factor_exposure(self,
                            beg_date="20060101",
                            end_date=datetime.today().strftime("%Y%m%d")):
        """ 计算因子暴露 计算beta和残差收益率 残差收益率还要计算残差波动率 """

        # params
        term = 252
        half_life = 63
        min_periods = 40

        # read data

        pct = Stock().read_factor_h5("Pct_chg")
        pct = Stock().replace_suspension_with_nan(pct)
        pct = Stock().fillna_with_mad_market(pct)
        pct = pct.T

        index_pct = Index().get_index_factor("000985.CSI", attr=['PCT']) * 100
        index_pct.columns = ['Index']
        risk_free_rate = Macro().get_daily_risk_free_rate()

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(
            set(pct.index) & set(date_series) & set(index_pct.index)
            & set(risk_free_rate.index))
        date_series.sort()

        beta = pd.DataFrame([], columns=date_series, index=pct.columns)
        res_pct = pd.DataFrame([], columns=date_series, index=pct.columns)

        for i_date in range(0, len(date_series)):

            current_date = date_series[i_date]
            print('Calculating Barra Risk factor %s at date %s' %
                  (self.factor_name, current_date))
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(term - 1))
            pct_before = pct.loc[data_beg_date:current_date, :]
            index_pct_before = index_pct.loc[data_beg_date:current_date, :]
            risk_free_rate_before = risk_free_rate.loc[
                data_beg_date:current_date, :]

            for i_code in range(len(pct_before.columns)):

                stock_code = pct_before.columns[i_code]
                pct_before_stock = pd.DataFrame(pct_before[stock_code])
                concat_data = pd.concat([
                    index_pct_before, risk_free_rate_before, pct_before_stock
                ],
                                        axis=1)
                concat_data = concat_data.dropna()

                weight = pd.DataFrame(TimeSeriesWeight().exponential_weight(
                    len(concat_data), half_life),
                                      index=concat_data.index,
                                      columns=['Weight'])
                concat_data['Weight'] = weight
                concat_data['ones'] = 1.0
                concat_data["Index"] -= concat_data["RiskFreeRate"]
                concat_data[stock_code] -= concat_data["RiskFreeRate"]

                if len(concat_data) > min_periods:

                    x = concat_data[['ones', "Index"]].values
                    y = concat_data[stock_code].values
                    model = sm.WLS(y, x,
                                   weights=concat_data['Weight'].values).fit()

                    res_series = y - model.fittedvalues
                    beta.loc[stock_code, current_date] = model.params[1]
                    res_pct.loc[stock_code, current_date] = res_series[-1]
                    print(stock_code, current_date, model.params[1],
                          res_series[-1])

        # save data
        beta = beta.T.dropna(how='all').T
        res_pct = res_pct.T.dropna(how='all').T
        self.save_risk_factor_exposure(beta, self.raw_factor_name)
        self.save_risk_factor_exposure(res_pct, self.raw_res_pct_factor_name)
        beta = FactorPreProcess().remove_extreme_value_mad(beta)
        beta = FactorPreProcess().standardization(beta)
        self.save_risk_factor_exposure(beta, self.factor_name)
Beispiel #10
0
def calculate_fund_factor_date(date, index_code, index_name, out_path):

    # 参数
    ########################################################################################################

    # index_code = '000905.SH'
    # index_name = '中证500'
    # date = '2015-12-31'
    # min_period = 200
    # out_path = 'E:\\4_代码\\pycharmprojects\\31_雪球优选增强基金\\output_data\\cal_fund_factor\\zz500\\'

    # 日期数据
    ########################################################################################################
    min_period = 200
    date_cur = datetime.strptime(date, "%Y%m%d")
    date_cur_int = date_cur.strftime('%Y%m%d')
    date_bef_1y = datetime(year=date_cur.year-1, month=date_cur.month, day=date_cur.day).strftime("%Y-%m-%d")
    date_aft_hy = (date_cur + pd.tseries.offsets.DateOffset(months=6, days=0)).strftime("%Y-%m-%d")

    # 读取数据 基金池 基金净值数据 指数收盘价数据
    ########################################################################################################

    path = os.path.join(out_path, 'filter_fund_pool\\')
    file = os.path.join(path, '基金最终筛选池_' + index_name + '.xlsx')
    fund_code = pd.read_excel(file, index_col=[1], encoding='gbk')

    fund_nav = Fund().get_fund_factor("Repair_Nav")
    index_close = Index().get_index_factor(index_code, None, None, attr=['CLOSE'])
    index_close.columns = [index_code]

    # 筛选新基金 并下载基金规模
    #######################################################################################################

    fund_code = fund_code.ix[:, ['上市日期', '基金简称']]
    fund_code = fund_code[fund_code['上市日期'] < date_bef_1y]

    fund_code_str = ','.join(fund_code.index)
    fund_asset = w.wss(fund_code_str, "netasset_total", "unit=1;tradeDate=" + str(date))
    fund_asset = pd.DataFrame(fund_asset.Data, index=['基金规模'], columns=fund_asset.Codes).T
    fund_asset['基金规模'] /= 100000000.0
    fund_asset['基金规模'] = fund_asset['基金规模'].round(2)
    fund_asset = fund_asset[fund_asset['基金规模'] > 0.5]
    fund_info = pd.concat([fund_code, fund_asset], axis=1)
    fund_info = fund_info.dropna()

    # 计算最近1年 各项指标
    ########################################################################################################
    result = pd.DataFrame([], index=fund_code.index, columns=['最近1年跟踪误差'])
    fund_nav = fund_nav.ix[index_close.index, fund_code.index]
    fund_pct = fund_nav.pct_change()
    index_pct = index_close.pct_change()
    index_pct = index_pct[index_code]
    fund_excess_pct = fund_pct.sub(index_pct, axis='index')
    fund_excess_pct_period = fund_excess_pct.ix[date_bef_1y:date, :]
    fund_nav_period = fund_nav.ix[date_bef_1y:date, :]
    index_close_prioed = index_close.ix[date_bef_1y:date, :]
    result.ix[:, "最近1年数据长度"] = fund_excess_pct_period.count()
    result.ix[:, "最近1年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250)
    # last_date_nav = fund_nav_period.iloc[len(fund_nav_period)-1, :]
    # first_date_nav = fund_nav_period.iloc[0, :]
    fund_return_log =(fund_nav_period.pct_change()+1.0).applymap(np.log).cumsum().ix[-1,:]
    fund_return = fund_return_log.map(np.exp) - 1
    last_date_close = index_close_prioed.iloc[len(fund_nav_period)-1, :]
    first_date_close = index_close_prioed.iloc[0, :]
    result.ix[:, "最近1年基金涨跌"] = fund_return
    result.ix[:, "最近1年指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0]
    result.ix[:, "最近1年超额收益"] = result.ix[:, "最近1年基金涨跌"] - result.ix[:, "最近1年指数涨跌"]
    result.ix[:, "最近1年信息比率"] = result.ix[:, "最近1年超额收益"] / result.ix[:, "最近1年跟踪误差"]

    result = result[result['最近1年数据长度'] > min_period]

    # 计算之后半年 各项指标
    ########################################################################################################
    fund_excess_pct_period = fund_excess_pct.ix[date:date_aft_hy, :]
    fund_nav_period = fund_nav.ix[date:date_aft_hy, :]
    index_close_prioed = index_close.ix[date:date_aft_hy, :]
    result.ix[:, "之后半年数据长度"] = fund_excess_pct_period.count()
    result.ix[:, "之后半年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250)
    try:
        fund_return_log = (fund_nav_period.pct_change() + 1.0).applymap(np.log).cumsum().ix[-1, :]
        fund_return = fund_return_log.map(np.exp) - 1
        result.ix[:, "之后半年基金涨跌"] = fund_return
    except:
        result.ix[:, "之后半年基金涨跌"] = np.nan

    try:
        last_date_close = index_close_prioed.iloc[len(fund_nav_period) - 1, :]
        first_date_close = index_close_prioed.iloc[0, :]
        result.ix[:, "之后半年指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0]
    except:
        result.ix[:, "之后半年指数涨跌"] = np.nan

    result.ix[:, "之后半年超额收益"] = result.ix[:, "之后半年基金涨跌"] - result.ix[:, "之后半年指数涨跌"]
    result.ix[:, "之后半年信息比率"] = result.ix[:, "之后半年超额收益"] / result.ix[:, "之后半年跟踪误差"]

    result = pd.concat([fund_info, result], axis=1)
    result = result.dropna(subset=["基金规模"])
    result = result.fillna("")

    # 写到EXCEL表
    ################################################################################################
    out_path = os.path.join(out_path, "cal_fund_factor\\" + index_name)

    num_format_pd = pd.DataFrame([], columns=result.columns, index=['format'])
    num_format_pd.ix['format', :] = '0.00%'
    num_format_pd.ix['format', '之后半年数据长度'] = '0.00'
    num_format_pd.ix['format', '之后半年信息比率'] = '0.00'
    num_format_pd.ix['format', '基金规模'] = '0.00'
    num_format_pd.ix['format', '最近1年信息比率'] = '0.00'
    num_format_pd.ix['format', '最近1年数据长度'] = '0.00'

    begin_row_number = 0
    begin_col_number = 0
    color = "red"
    file_name = os.path.join(out_path, '基金指标_' + index_name + '_' + date_cur_int + '.xlsx')
    sheet_name = "基金指标"
    write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color)
    ################################################################################################################

    return True
Beispiel #11
0
def filter_fund_pool(index_code, begin_date, end_date,
                     min_period, ipo_date, track_error_up,
                     index_name, out_path):

    #############################################################################################
    # begin_date = '2017-05-31'
    # end_date = '2018-05-31'
    # ipo_date = '2017-05-31'
    # min_period = 200
    # index_name = '沪深300'
    # index_code = '000300.SH'

    # 读取数据
    #############################################################################################
    fund_nav = Fund().get_fund_factor("Repair_Nav")
    index_close = Index().get_index_factor(index_code, None, None, attr=['CLOSE'])
    index_close.columns = [index_code]

    result = pd.DataFrame([], index=fund_nav.columns, columns=['最近1年跟踪误差', '有效数据长度'])

    # 计算最近1年跟踪误差数据
    #############################################################################################
    fund_nav = fund_nav.ix[index_close.index, :]
    fund_pct = fund_nav.pct_change()
    index_pct = index_close.pct_change()
    index_pct = index_pct[index_code]
    fund_excess_pct = fund_pct.sub(index_pct, axis='index')
    fund_excess_pct_period = fund_excess_pct.ix[begin_date: end_date]
    result.ix[:, "有效数据长度"] = fund_excess_pct_period.count()
    result.ix[:, "最近1年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250)

    # 筛选
    #############################################################################################
    result = result.dropna()
    result = result[result['有效数据长度'] > min_period]
    result = result[result['最近1年跟踪误差'] < track_error_up]

    code_str = ','.join(result.index)
    data = w.wss(code_str, "fund_benchmark,fund_fullname,fund_setupdate,fund_investtype")
    data_pd = pd.DataFrame(data.Data, index=data.Fields, columns=data.Codes).T
    data_pd.columns = ['基金基准', '基金全称', '上市日期', '基金类型']
    data_pd['上市日期'] = data_pd['上市日期'].map(lambda x: x.strftime('%Y-%m-%d'))
    result = pd.concat([data_pd, result], axis=1)
    result = result[result["基金基准"].map(lambda x: index_name in x)]
    result = result[result["上市日期"] < ipo_date]
    result = result[result["基金全称"].map(lambda x: "交易型开放式指数" not in x)]
    result = result[result["基金全称"].map(lambda x: "联接" not in x)]

    # 输出结果
    ############################################################################################
    out_path = os.path.join(out_path, "filter_fund_pool")
    num_format_pd = pd.DataFrame([], columns=result.columns, index=['format'])
    num_format_pd.ix['format', :] = '0.00'

    begin_row_number = 0
    begin_col_number = 1
    color = "red"
    file_name = os.path.join(out_path, '基金初次筛选池_' + index_name + '.xlsx')
    sheet_name = "基金筛选池"

    write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color)
Beispiel #12
0
def Index_Group_Statistics(out_path, index_code, value_factor, group_number):

    # 0、输入参数
    ##############################################################################
    # out_path = 'C:\\Users\\doufucheng\\OneDrive\\Desktop\\data\\'
    # index_code = "000300.SH"
    # value_factor = 'PE_TTM'
    # group_number = 8

    # 1、原始数据整理
    ##############################################################################

    # 1、1: 读入指数收益率数据和pe
    ###################################
    data = Index().get_index_factor(index_code, None, None,
                                    ['PCT', value_factor])
    data = data.dropna(subset=[value_factor])
    data.columns = ['pct', 'pe']
    data['pe'] = data['pe'].round(2)
    year_number = 242

    # 1、2: 指数 日收益 累计收益
    ###################################
    data['ln_pct'] = np.log(data['pct'] + 1)
    data['cum_sum_pct'] = data['ln_pct'].cumsum().map(lambda x: np.exp(x) - 1)

    # 1、3: 之后1、3、5年的收益
    ###################################
    data['return_1y'] = data['ln_pct'].rolling(
        window=year_number).sum().shift(-year_number)
    data['return_3y'] = data['ln_pct'].rolling(window=year_number *
                                               3).sum().shift(-year_number * 3)
    data['return_5y'] = data['ln_pct'].rolling(window=year_number *
                                               5).sum().shift(-year_number * 5)

    # 1、4: 之后1、3、5年的收益是否大于0
    ###################################
    data['if_zero_1y'] = data['return_1y'] > 0.0
    data['if_zero_3y'] = data['return_3y'] > 0.0
    data['if_zero_5y'] = data['return_5y'] > 0.0

    # 1、5: 在全局的pe百分比
    ###################################
    data['rank'] = data['pe'].rank() / len(data)
    data['rank'] *= 100
    data['rank'] = data['rank'].round(0)

    # 1、7: 之后1年超过初始PE的时间
    ###################################
    for i in range(0, len(data) - year_number):
        init_pe = data.ix[i, "pe"]
        data_bigger = data.ix[i:i + year_number, 'pe'] > init_pe
        data_bigger = data_bigger[data_bigger]
        ratio = len(data_bigger) / year_number
        data.ix[i, 'if_1y_ratio'] = ratio

    # 1、8: 之后3年超过初始PE的时间
    ###################################
    for i in range(0, len(data) - year_number * 3):
        init_pe = data.ix[i, "pe"]
        data_bigger = data.ix[i:i + year_number * 3, 'pe'] > init_pe
        data_bigger = data_bigger[data_bigger]
        ratio = len(data_bigger) / (year_number * 3)
        data.ix[i, 'if_3y_ratio'] = ratio

    # 1、9: 之后5年超过初始PE的时间
    ###################################
    for i in range(0, len(data) - year_number * 5):
        init_pe = data.ix[i, "pe"]
        data_bigger = data.ix[i:i + year_number * 5, 'pe'] > init_pe
        data_bigger = data_bigger[data_bigger]
        ratio = len(data_bigger) / (year_number * 5)
        data.ix[i, 'if_5y_ratio'] = ratio

    data['pe_cut'] = pd.qcut(data['pe'], group_number)
    data.to_csv(out_path + index_code + '_原始数据.csv')

    # 2、分组统计
    ##############################################################################

    # 2、1: 1年后 收益中位数、pe超过初始PE的百分比、有效数字的个数
    ########################################################
    my_data = data.dropna(subset=['return_1y'])
    my_data['pe_cut'] = pd.qcut(data['pe'], group_number)
    if_pe_1y_ratio = my_data.groupby(by=['pe_cut'])['if_1y_ratio'].mean()
    return_1y_median = my_data.groupby(by=['pe_cut'])['return_1y'].median()
    return_1y_count = my_data.groupby(by=['pe_cut'])['return_1y'].count()
    my_data['pe_rank_cut'] = pd.qcut(data['rank'], group_number)
    return_pe_rank = my_data.groupby(by=['pe_rank_cut'])['return_1y'].median()
    return_pe_rank = pd.DataFrame(
        return_pe_rank.index.values.to_dense(),
        index=return_1y_median.index.values.to_dense(),
        columns=['历史百分位数'])
    return_pe_rank['开始时间'] = data.index[0]
    return_pe_rank['结束时间'] = data.index[-1]
    return_pe_rank['当前PE'] = data.ix[-1, 'pe']

    # 1年后 收益大于0的百分比
    if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_1y'].sum()
    sum_number = my_data.groupby(by=['pe_cut'])['if_zero_1y'].count()
    zero_ratio_1y = pd.DataFrame(if_zero_number / sum_number)

    # 2、1: 3年后 收益中位数、pe超过初始PE的百分比、有效数字的个数
    #######################################################

    my_data = data.dropna(subset=['return_3y'])
    my_data['pe_cut'] = pd.qcut(data['pe'], group_number)
    if_pe_3y_ratio = my_data.groupby(by=['pe_cut'])['if_3y_ratio'].median()
    return_3y_median = my_data.groupby(by=['pe_cut'])['return_3y'].median()
    return_3y_count = my_data.groupby(by=['pe_cut'])['return_3y'].count()

    if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_3y'].sum()
    sum_number = my_data.groupby(by=['pe_cut'])['if_zero_3y'].count()
    zero_ratio_3y = pd.DataFrame(if_zero_number / sum_number)

    # 2、3: 5年后 收益中位数、pe超过初始PE的百分比、有效数字的个数
    #######################################################
    my_data = data.dropna(subset=['return_5y'])
    my_data['pe_cut'] = pd.qcut(data['pe'], group_number)
    if_pe_5y_ratio = my_data.groupby(by=['pe_cut'])['if_5y_ratio'].median()
    return_5y_median = my_data.groupby(by=['pe_cut'])['return_5y'].median()
    return_5y_count = my_data.groupby(by=['pe_cut'])['return_5y'].count()

    if_zero_number = my_data.groupby(by=['pe_cut'])['if_zero_5y'].sum()
    sum_number = my_data.groupby(by=['pe_cut'])['if_zero_5y'].count()
    zero_ratio_5y = pd.DataFrame(if_zero_number / sum_number)

    # 数据输出
    ##############################################################################

    res = pd.concat([
        return_pe_rank, return_1y_count, return_3y_count, return_5y_count,
        return_1y_median, return_3y_median, return_5y_median, zero_ratio_1y,
        zero_ratio_3y, zero_ratio_5y, if_pe_1y_ratio, if_pe_3y_ratio,
        if_pe_5y_ratio
    ],
                    axis=1)
    res.index.name = "PE绝对值范围"
    res.columns = [
        "PE百分位范围", '开始时间', '结束时间', '当前PE', '有效数据个数_1y', '有效数据个数_3y',
        '有效数据个数_5y', '收益中位数_1y', '收益中位数_3y', '收益中位数_5y', '收益大于0的比例_1y',
        '收益大于0的比例_3y', '收益大于0的比例_5y', '超过初始PE天数的比例的中位数_1y',
        '超过初始PE天数的比例的中位数_3y', '超过初始PE天数的比例的中位数_5y'
    ]

    res.index = res.index.values.to_dense()
    res.index.name = "PE绝对值范围"

    num_format_pd = pd.DataFrame([], columns=res.columns, index=['format'])
    num_format_pd.ix['format', :] = '0.00%'
    num_format_pd.ix[
        'format',
        ['开始时间', '结束时间', '当前PE', '有效数据个数_1y', '有效数据个数_3y', '有效数据个数_5y'
         ]] = '0.0'

    begin_row_number = 0
    begin_col_number = 1
    color = "red"
    file_name = out_path + index_code + '_收益中位数.xlsx'
    sheet_name = "收益中位数"

    write_pandas(file_name, sheet_name, begin_row_number, begin_col_number,
                 res, num_format_pd, color)