コード例 #1
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        new_stock_days = 60

        data = Stock().get_ipo_date()
        data.columns = ['ipo', 'delist']
        data = data.astype(str)
        date_series = Date().get_trade_date_series(beg_date, end_date)

        res = pd.DataFrame()

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            print('Calculating Barra Risk factor %s at date %s' %
                  (self.raw_factor_name, date))
            new_stock_date = Date().get_trade_date_offset(
                date, -new_stock_days)
            data_date = data[(data['ipo'] < new_stock_date)
                             & (data['delist'] > date)]
            data_date['GEM'] = data_date.index.map(CodeFormat().get_gem_stock)
            res_date = pd.DataFrame(data_date['GEM'])
            res_date.columns = [date]
            res = pd.concat([res, res_date], axis=1)

        self.save_risk_factor_exposure(res, self.raw_factor_name)
コード例 #2
0
def cal_ipo_sell():

    out_path = 'E:\\3_Data\\7_other_data\\4_cal_ipo_sell\\'
    new_days = 60

    today = datetime.today().strftime("%Y%m%d")

    Date().load_trade_date_series("D")
    # MfcData().load_ftp_daily(date=today)
    # MfcData().change_ftp_file(date=today)

    before_trade_data = Date().get_trade_date_offset(today, -1)

    data = MfcData().get_fund_security(before_trade_data)
    data = data.dropna(subset=['基金名称'])
    data = data[['基金名称', '证券代码', '持仓', '证券类别']]
    data.columns = ['FundName', 'StockCode', 'Holding', 'Type']
    data = data[data.Type == '股票']
    data.StockCode = data.StockCode.map(stock_code_add_postfix)
    data["Market"] = data.StockCode.map(get_stcok_market)

    Stock().load_all_stock_code_now()
    Stock().load_ipo_date()
    stock = Stock().get_ipo_date()
    stock.columns = ['IpoDate', 'DelistDate']
    stock['StockCode'] = stock.index
    stock['IpoDate'] = stock['IpoDate'].map(lambda x: str(int(x)))

    new_stock_date = datetime.today() - timedelta(days=new_days)
    new_stock_date = new_stock_date.strftime("%Y%m%d")

    all_data = pd.merge(data, stock, on=['StockCode'], how="left")
    all_data = all_data[all_data.IpoDate > new_stock_date]

    code_list = list(set(all_data.StockCode.values))
    code_str = ','.join(code_list)
    pct = w.wsq(code_str, "rt_pct_chg,rt_vol")
    pct = pd.DataFrame(pct.Data, columns=pct.Codes, index=['Pct', 'Vol']).T
    pct['StockCode'] = pct.index

    new_data = pd.merge(all_data, pct, on=['StockCode'], how="left")
    new_data = new_data[new_data['Vol'] > 0]
    new_data = new_data[new_data['Pct'] < 0.09]

    fund_list = list(set(data['FundName']))

    for i_fund in range(len(fund_list)):

        fund_name = fund_list[i_fund]
        fund_data = new_data[new_data.FundName == fund_name]
        out_sub_path = os.path.join(out_path, today)
        if not os.path.exists(out_sub_path):
            os.mkdir(out_sub_path)
            print(" Make Folder At ", today)

        if len(fund_data) > 0:

            warnings.filterwarnings("ignore")
            fund_data_out = fund_data[['StockCode', 'Holding', 'Market']]
            fund_data_out.columns = ['Ticker', 'Shares', 'Market']
            fund_data_out['Direction'] = 2
            fund_data_out['Price'] = 0
            fund_data_out['Market Code'] = fund_data_out['Market'].map(
                lambda x: 1 if x == 'SH' else 2)
            fund_data_out['Price Model'] = 4
            fund_data_out['Ticker'] = fund_data_out['Ticker'].map(
                lambda x: x[0:6])

            fund_data_out = fund_data_out[[
                'Ticker', 'Direction', 'Shares', 'Price', 'Price Model',
                'Market Code'
            ]]

            file = fund_name + '.xls'
            out_file = os.path.join(out_sub_path, file)
            print(out_file)
            fund_data_out.to_excel(out_file, index=None)
            ftp = MyFtp()
            ftp.connect()
            ftp_file = os.path.join("\\ipo_stock\\", today, file)
            ftp.upload_file(ftp_file, out_file)
            ftp.close()
コード例 #3
0
    def ipo_sell(self,
                 today=datetime.today().strftime("%Y%m%d")):

        """ 计算所有基金新股是否打开 """

        before_trade_data = Date().get_trade_date_offset(today, -1)

        # get holding data
        data = MfcData().get_group_security(before_trade_data)
        data = data.dropna(subset=['基金名称'])
        data = data[['基金名称', '基金编号', '组合名称', '组合编号', '证券代码', '持仓', '证券类别']]
        data.columns = [['基金名称', '基金编号(序号)', '组合名称', '组合编号', '证券代码', '指令数量', '证券类别']]

        data = data[data['证券类别'] == '股票']
        data['证券代码'] = data['证券代码'].map(CodeFormat().stock_code_add_postfix)
        data["交易市场内部编号"] = data['证券代码'].map(CodeFormat().get_stcok_market)

        fund_info = MfcData().get_mfc_fund_info()
        fund_info = fund_info.dropna(subset=['FundId'])
        fund_info['FundId'] = fund_info['FundId'].map(CodeFormat.stock_code_add_postfix)
        fund_info['FundId'] = fund_info['FundId'].map(lambda x: x[0:6])

        # get ipo data
        Stock().load_all_stock_code_now()
        Stock().load_ipo_date()
        stock = Stock().get_ipo_date()
        stock.columns = ['IpoDate', 'DelistDate']
        stock['证券代码'] = stock.index
        stock['IpoDate'] = stock['IpoDate'].map(lambda x: str(int(x)))

        # get New Stock
        new_stock_date = datetime.today() - timedelta(days=self.new_days)
        new_stock_date = new_stock_date.strftime("%Y%m%d")
        all_data = pd.merge(data, stock, on=['证券代码'], how="left")
        all_data = all_data[all_data.IpoDate > new_stock_date]

        # get Vol and Pct of New Stock
        code_list = list(set(all_data['证券代码'].values))
        code_str = ','.join(code_list)
        pct = w.wsq(code_str, "rt_pct_chg,rt_vol")
        pct = pd.DataFrame(pct.Data, columns=pct.Codes, index=['Pct', 'Vol']).T
        pct['证券代码'] = pct.index

        # filter multi_factor
        new_data = pd.merge(all_data, pct, on=['证券代码'], how="left")
        new_data = new_data[new_data['Vol'] > 0]
        new_data = new_data[new_data['Pct'] < 0.09]

        # New Local Folder
        out_sub_path = os.path.join(self.data_path, today)
        if not os.path.exists(out_sub_path):
            os.mkdir(out_sub_path)

        # New FTP Folder
        ftp = MyFtp()
        ftp.connect()
        ftp_folder = os.path.join(self.ftp_path, today)
        ftp.upload_folder(ftp_folder)
        ftp.close()

        # manager data
        manager_data = MfcData().get_manager_fund()

        for i_manager in range(len(manager_data.columns)):

            manager_name = manager_data.columns[i_manager]
            manager_fund = manager_data[manager_name]
            manager_fund = manager_fund.dropna()

            fund_data = new_data[new_data['基金名称'].map(lambda x: x in manager_fund.values)]

            if len(fund_data) > 0:

                warnings.filterwarnings("ignore")

                fund_data_out = fund_data[['证券代码', '指令数量', '交易市场内部编号',
                                           '基金编号(序号)', '基金名称', '组合编号']]
                fund_data_out['委托方向'] = 2
                fund_data_out['指令价格'] = 0
                fund_data_out['交易市场内部编号'] = fund_data_out['交易市场内部编号'].map(lambda x: 1 if x == 'SH' else 2)
                fund_data_out['价格模式'] = ""
                fund_data_out['当前指令市值/净值(%)'] = ""
                fund_data_out['目标市值/净值(%)'] = ""
                fund_data_out['基金名称'] = ""
                fund_data_out['证券代码'] = fund_data_out['证券代码'].map(lambda x: x[0:6])
                fund_data_out = fund_data_out[['证券代码', '委托方向', '指令数量', '指令价格',
                                               '价格模式', '交易市场内部编号', '当前指令市值/净值(%)', '目标市值/净值(%)',
                                               '基金编号(序号)', '基金名称', '组合编号']]

                file = manager_name + '.xls'
                out_file = os.path.join(out_sub_path, file)
                print(out_file)
                fund_data_out.to_excel(out_file, index=None)

                ftp = MyFtp()
                ftp.connect()
                ftp_file = os.path.join(self.ftp_path, today, file)
                ftp.upload_file(ftp_file, out_file)
                ftp.close()
コード例 #4
0
    def backtest_alpha_factor(self, factor_name):

        # 参数
        ####################################################################################################
        ipo_min_days = 90
        # factor_name = 'ROERankYOY'
        input_stock_pool = None
        input_backtest_date_series = None
        tradedays_yearly = 250
        transaction_cost = 0.0008
        stamp_tax = 0.001
        min_trade_volume = 0.0
        min_free_mv = 0.0
        need_alpha_norm_inv = True
        lead_lag_length = 50
        group_number = 10
        beg_date = "20040101"
        end_date = "20181001"
        backtest_period = "W"
        backtest_period_days = 5
        path = r'E:\3_Data\5_stock_data\3_alpha_model\backtest_alpha'
        ####################################################################################################

        # 所需要的数据
        ####################################################################################################
        alpha_factor = Stock().read_factor_h5(factor_name,
                                              Stock().get_h5_path(",y_alpha"))
        alpha_post = factor_name[-3:]
        alpha_factor_date_series = list(alpha_factor.columns)
        if need_alpha_norm_inv:
            alpha_factor = FactorPreProcess().inv_normalization(alpha_factor)

        trade_status = Stock().read_factor_h5("TradingStatus")
        trade_status_date_series = list(trade_status.columns)

        if alpha_post != 'Res':
            stock_pct = Stock().read_factor_h5("Pct_chg")
            stock_pct_date_series = list(stock_pct.columns)
        else:
            stock_pct = Stock().read_factor_h5("PctRes",
                                               Stock().get_h5_path("my_alpha"))
            stock_pct_date_series = list(stock_pct.columns)

        price_adjust = Stock().read_factor_h5("PriceCloseAdjust")
        price_adjust_date_series = list(price_adjust.columns)

        trade_volume = Stock().read_factor_h5("TradeVolumn")
        trade_volume_date_series = list(trade_volume.columns)

        free_mv = Stock().read_factor_h5("Mkt_freeshares")
        free_mv_date_series = list(free_mv.columns)

        ipo_days = Stock().get_ipo_date()
        ipo_days.columns = ['IpoDate', 'DelistDate']

        # 回测日期
        ####################################################################################################
        bt_beg_date = max(beg_date, trade_status_date_series[0],
                          stock_pct_date_series[0],
                          alpha_factor_date_series[0],
                          price_adjust_date_series[0],
                          trade_volume_date_series[0], free_mv_date_series[0])
        bt_end_date = min(end_date, trade_status_date_series[-1],
                          stock_pct_date_series[-1],
                          alpha_factor_date_series[-1],
                          price_adjust_date_series[-1],
                          trade_volume_date_series[-1],
                          free_mv_date_series[-1])

        if input_backtest_date_series is None:
            backtest_date_series = Date().get_trade_date_series(
                bt_beg_date, bt_end_date, backtest_period)
        else:
            backtest_date_series = Date().get_trade_date_series(
                bt_beg_date, bt_end_date, "D")
            backtest_date_series = list(
                set(input_backtest_date_series) & set(backtest_date_series))
            backtest_date_series.sort()

        backtest_date_series = set(trade_status_date_series) & set(stock_pct_date_series) & \
                               set(alpha_factor_date_series) & set(price_adjust_date_series) & \
                               set(backtest_date_series) & set(trade_volume_date_series) & set(free_mv_date_series)
        backtest_date_series = list(backtest_date_series)
        backtest_date_series.sort()
        ####################################################################################################

        # 开始每日回测
        ####################################################################################################
        result = pd.DataFrame([],
                              columns=['ValDate', "BuyDate", "SellDate"],
                              index=backtest_date_series)
        lag_result = pd.DataFrame([], index=backtest_date_series)
        labels = ["Gp_" + str(x) for x in range(1, group_number + 1)]
        group_result = pd.DataFrame([],
                                    columns=labels,
                                    index=backtest_date_series)

        for i_date in range(0, len(backtest_date_series) - 1):

            # 日期
            ##############################################################################
            alpha_date = backtest_date_series[i_date]
            trade_date = Date().get_trade_date_offset(alpha_date, 1)
            next_alpha_date = backtest_date_series[i_date + 1]
            next_trade_date = Date().get_trade_date_offset(next_alpha_date, 1)
            print("BackTest Stock Alpha At %s" % alpha_date)
            ##############################################################################

            # 合并数据
            ##############################################################################
            alpha_factor_date = pd.DataFrame(alpha_factor[alpha_date])
            alpha_factor_date.columns = ['Alpha']

            next_alpha_factor_date = pd.DataFrame(
                alpha_factor[next_alpha_date])
            next_alpha_factor_date.columns = ['NextAlpha']

            trade_status_date = pd.DataFrame(trade_status[trade_date])
            trade_status_date.columns = ['Status']

            price_adjust_date = pd.DataFrame(price_adjust[trade_date])
            price_adjust_date.columns = ['Price']

            next_price_adjust_date = pd.DataFrame(
                price_adjust[next_trade_date])
            next_price_adjust_date.columns = ['NextPrice']

            all_data = pd.concat([
                alpha_factor_date, next_alpha_factor_date, trade_status_date,
                price_adjust_date, next_price_adjust_date, ipo_days
            ],
                                 axis=1)
            all_data = all_data.dropna()
            ##############################################################################

            # 股票池
            # 剔除不能交易的股票
            # 剔除新股(还可以剔除流通市值或者交易额比较少的股票)
            # 是否有外部股票池
            ##############################################################################
            can_trade_code = all_data['Status'].map(lambda x: x in [0, 1])
            all_data = all_data.loc[can_trade_code, :]
            the_ipo_date = Date().get_trade_date_offset(
                alpha_date, -ipo_min_days)
            all_data = all_data.loc[all_data['IpoDate'] < the_ipo_date, :]
            all_data = all_data.dropna()

            if input_stock_pool is None:
                stock_pool = list(all_data.index)
                stock_pool.sort()
            else:
                stock_pool = list(
                    set(input_stock_pool) & set(list(all_data.index)))
                stock_pool.sort()

            all_data = all_data.loc[stock_pool, :]
            all_data['Pct'] = all_data['NextPrice'] / all_data['Price'] - 1.0
            ##############################################################################

            # 计算因子的时滞性
            ##############################################################################
            # for i in np.arange(-lead_lag_length, lead_lag_length):
            #
            #     lag_alpha_date = Date().get_trade_date_offset(alpha_date, -i)
            #
            #     if lag_alpha_date in exposure.columns:
            #         alpha_factor_date = pd.DataFrame(exposure[lag_alpha_date])
            #         alpha_factor_date.columns = ['Alpha']
            #
            #         lag_all_data = all_data.copy()
            #         lag_all_data['Alpha'] = alpha_factor_date.loc[lag_all_data.index, 'Alpha']
            #
            #         lag_all_data['AlphaStand'] = lag_all_data['Alpha'] - lag_all_data['Alpha'].mean()
            #         lag_all_data['AlphaStand'] /= lag_all_data['Alpha'].std()
            #         lag_all_data['Weight'] = lag_all_data['AlphaStand'] / lag_all_data['AlphaStand'].abs().sum()
            #         ls_factor_return = (lag_all_data['Weight'] * lag_all_data['Pct']).sum()
            #         lag_result.loc[alpha_date, "Lag_" + str(i)] = ls_factor_return
            #     else:
            #         lag_result.loc[alpha_date, "Lag_" + str(i)] = np.nan
            ##############################################################################

            # 计算因子收益率 IC等
            ##############################################################################
            ic = all_data['Pct'].corr(all_data['Alpha'])
            rank_ic = all_data['Pct'].corr(all_data['Alpha'],
                                           method='spearman')
            all_data['AlphaStand'] = (
                all_data['Alpha'] -
                all_data['Alpha'].mean()) / all_data['Alpha'].std()
            all_data['Weight'] = all_data['AlphaStand'] / all_data[
                'AlphaStand'].abs().sum()
            ls_factor_return = (all_data['Weight'] * all_data['Pct']).sum()
            port_alpha_exposure = (all_data['AlphaStand'] *
                                   all_data['AlphaStand']).sum()
            ls_factor_return_2 = (all_data['AlphaStand'] *
                                  all_data['Pct']).sum() / port_alpha_exposure
            auto_rank_corr = all_data['NextAlpha'].corr(all_data['Alpha'],
                                                        method='spearman')
            ##############################################################################

            # 计算分组收益率
            ##############################################################################
            all_data_sort = all_data.sort_values(by=['Alpha'], ascending=False)
            labels = ["Gp_" + str(x) for x in range(1, group_number + 1)]
            all_data_sort['Gp'] = pd.qcut(all_data_sort['Alpha'],
                                          q=group_number,
                                          labels=labels)
            all_mean = all_data_sort['Pct'].mean()
            group_result.loc[alpha_date, labels] = all_data_sort.groupby(
                by=['Gp'])['Pct'].mean() - all_mean
            ##############################################################################

            # LongTopShortOtherReturn
            ##############################################################################
            all_data_sort = all_data.sort_values(by=['Alpha'], ascending=False)
            top_end_index = int(len(all_data) / group_number)
            all_data_sort['Score'] = 0.0
            all_data_sort.loc[all_data_sort.index[0:top_end_index],
                              "Score"] = 1.0
            all_data_sort['Score'] -= all_data_sort['Score'].mean()
            all_data_sort['Score'] /= all_data_sort['Score'].abs().sum()
            long_top_short_other_return = (all_data_sort['Score'] *
                                           all_data_sort['Pct']).sum()
            ##############################################################################

            # ShortBottomLongOtherReturn
            ##############################################################################
            all_data_sort = all_data.sort_values(by=['Alpha'], ascending=True)
            top_end_index = int(len(all_data) / group_number)
            all_data_sort['Score'] = 0.0
            all_data_sort.loc[all_data_sort.index[0:top_end_index],
                              "Score"] = -1.0
            all_data_sort['Score'] -= all_data_sort['Score'].mean()
            all_data_sort['Score'] /= all_data_sort['Score'].abs().sum()
            short_bottom_long_other_return = (all_data_sort['Score'] *
                                              all_data_sort['Pct']).sum()
            ##############################################################################

            # 写入Result
            ##############################################################################
            result.loc[alpha_date, "ValDate"] = alpha_date
            result.loc[alpha_date, "BuyDate"] = trade_date
            result.loc[alpha_date, "SellDate"] = next_trade_date
            result.loc[alpha_date, "RankIC"] = rank_ic
            result.loc[alpha_date, "IC"] = ic
            result.loc[alpha_date, "LSFactorReturn"] = ls_factor_return
            result.loc[alpha_date, 'AutoRankCorr'] = auto_rank_corr
            result.loc[alpha_date, 'StockNumber'] = len(all_data)
            result.loc[alpha_date, 'StdPct'] = all_data['Pct'].std()
            result.loc[
                alpha_date,
                'ShortBottomLongOtherReturn'] = short_bottom_long_other_return
            result.loc[alpha_date,
                       'LongTopShortOtherReturn'] = long_top_short_other_return
            # LSFactorReturn = IC*std(AlphaStand)*std(StdPct)*(N-1)
            ##############################################################################

        # 每日循环结束 输出文件
        ####################################################################################################
        result['CumLSFactorReturn'] = result["LSFactorReturn"].cumsum()
        result['CumRankIC'] = result["RankIC"].cumsum()
        result['CumShortBottomLongOtherReturn'] = result[
            "ShortBottomLongOtherReturn"].cumsum()
        result['CumLongTopShortOtherReturn'] = result[
            "LongTopShortOtherReturn"].cumsum()

        group_result_cumsum = group_result.cumsum()
        lag_result_cumsum = lag_result.cumsum()
        ##############################################################################
        summary = pd.DataFrame([], columns=['Summary'])
        year_factor_return = result['LSFactorReturn'].mean(
        ) * tradedays_yearly / backtest_period_days
        year_factor_std = result['LSFactorReturn'].std() * np.sqrt(
            tradedays_yearly / backtest_period_days)
        ic_mean = result['RankIC'].mean()
        ic_std = result['RankIC'].std()
        mean_antocorr = result['AutoRankCorr'].mean()

        ####################################################################################################
        summary.loc["YearFactorReturn", 'Summary'] = year_factor_return
        summary.loc["YearFactorStd", 'Summary'] = year_factor_std
        summary.loc["YearFactorIR",
                    'Summary'] = year_factor_return / year_factor_std
        summary.loc["ICMean", 'Summary'] = ic_mean
        summary.loc["ICstd", 'Summary'] = ic_std
        summary.loc["ICIR", 'Summary'] = ic_mean / ic_std
        summary.loc["AntoCorr", 'Summary'] = mean_antocorr
        ####################################################################################################

        sub_path = os.path.join(path, factor_name)
        if not os.path.exists(sub_path):
            os.makedirs(sub_path)
        result.to_csv(os.path.join(sub_path, factor_name + '_Result.csv'))
        group_result_cumsum.to_csv(
            os.path.join(sub_path, factor_name + '_GroupResult.csv'))
        lag_result_cumsum.to_csv(
            os.path.join(sub_path, factor_name + '_LagResult.csv'))
        summary.to_csv(os.path.join(sub_path, factor_name + '_Summary.csv'))