コード例 #1
0
ファイル: InFlowFreeMv.py プロジェクト: rlcjj/quant
def InFlowFreeMv(beg_date, end_date):
    """
    因子说明:过去 10天 资金净流入额/自由流通市值
    流入为当日成交价上升的时候的成交额和成交量 流出为当日成交价下降时候的成交额和成交量
    """

    # param
    #################################################################################
    LongTerm = 10
    factor_name = "InFlowFreeMv"
    ipo_num = 90

    # read data
    #################################################################################
    inflow = Stock().get_factor_h5("Mf_Inflow", None, "primary_mfc").T
    free_mv = Stock().get_factor_h5("FreeMarketValue", None, "alpha_dfc").T

    # code set & date set
    #################################################################################
    [inflow, free_mv] = Stock().make_same_index_columns([inflow, free_mv])

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(date_series) & set(inflow.index))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date,
                                                     -(LongTerm - 1))
        inflow_pre = inflow.ix[data_beg_date:current_date, :]
        free_mv_pre = free_mv.ix[data_beg_date:current_date, :]

        if len(inflow_pre) >= int(0.8 * LongTerm):

            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))

            inflow_pre_sum = inflow_pre.sum()
            free_mv_pre_sum = free_mv_pre.sum()

            date_data = pd.concat([inflow_pre_sum, free_mv_pre_sum], axis=1)
            date_data.columns = ['inflow', 'free_mv']
            date_data = date_data[date_data['free_mv'] != 0.0]
            date_data['ratio'] = date_data['inflow'] / date_data[
                'free_mv'] * 100000000
        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))
            date_data = pd.DataFrame([],
                                     columns=['ratio'],
                                     index=free_mv.columns)

        if i == 0:
            res = pd.DataFrame(date_data['ratio'].values,
                               columns=[current_date],
                               index=date_data.index)
        else:
            res_add = pd.DataFrame(date_data['ratio'].values,
                                   columns=[current_date],
                                   index=date_data.index)
            res = pd.concat([res, res_add], axis=1)

    res = res.T.dropna(how='all').T
    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #2
0
def holding_data_yangchao(today, project_path, out_path):

    # 输入参数
    ##################################################################################
    person_name = 'yangchao'
    before_trade_data = Date().get_trade_date_offset(today, -1)
    today = Date().change_to_str(today)

    # 基金列表
    ##################################################################################
    fund = pd.read_excel(project_path + 'Manage_Fund_Name.xlsx',
                         encoding='gbk')
    fund_val = fund.ix[:, person_name]
    fund_val = fund_val.dropna()
    fund_list = list(fund_val.values)

    # 基金持仓证券
    ##################################################################################

    fund_asset = MfcData().get_group_security(before_trade_data)
    fund_asset = fund_asset[[
        '日期', '组合名称', '基金名称', '证券代码', '证券名称', '持仓', '市值比净值(%)', '盈亏率(%)',
        '证券类别', '当日买金额', '当日卖金额', '资产单元名称', '持仓多空标志'
    ]]

    for i_fund in range(len(fund_list)):

        fund_name = fund_list[i_fund]
        fund_asset_fund = fund_asset[fund_asset['基金名称'] == fund_name]
        out_sub_path = os.path.join(out_path, person_name, today,
                                    "holding_data")
        if not os.path.exists(out_sub_path):
            os.mkdir(out_sub_path)
        out_file = os.path.join(out_sub_path, fund_name + '.csv')
        fund_asset_fund.to_csv(out_file, index=None)

    # 绝对收益组合资产
    ##################################################################################
    group_name = 'yangchao_group'
    fund_val = fund.ix[:, group_name]
    fund_val = fund_val.dropna()
    fund_list = list(fund_val.values)

    fund_asset = MfcData().get_group_security(before_trade_data)
    fund_asset = fund_asset[[
        '日期', '组合名称', '基金名称', '证券代码', '证券名称', '持仓', '市值比净值(%)', '盈亏率(%)',
        '证券类别', '当日买金额', '当日卖金额', '资产单元名称', '持仓多空标志'
    ]]

    for i_fund in range(len(fund_list)):

        fund_name = fund_list[i_fund]
        one_fund = fund_asset[fund_asset['组合名称'] == fund_name]
        out_sub_path = os.path.join(out_path, person_name, today,
                                    "holding_data")
        if not os.path.exists(out_sub_path):
            os.mkdir(out_sub_path)
        if fund_name == '绝对收益期货组合':
            fund_name = "绝对收益股指期货组合"
        out_file = os.path.join(out_sub_path, fund_name + '.csv')
        one_fund.to_csv(out_file)

    # 股票库
    ##################################################################################
    pool_path = Parameter().get_load_out_file("Mfc_Data")
    pool_list = [
        "公司超五库.xls", "公司股票库.xls", "公司关联库.xls", "公司禁止库.xls", "公司限制库.xls",
        "绝对收益禁止库.xls", "绝对收益投资库.xls", "量化限制库.xls"
    ]

    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")

    for i_file in range(len(pool_list)):

        file = pool_list[i_file]
        src_file = os.path.join(pool_path, 'raw_file', today, file)
        out_file = os.path.join(out_sub_path, file)
        try:
            shutil.copyfile(src_file, out_file)
        except:
            pd.DataFrame().to_excel(out_file)

    # 股票库 英文
    ##################################################################################
    pool_path = Parameter().get_load_out_file("Mfc_Data")
    pool_list = {
        "公司禁止库.xls": "Company Forbidden Pool.csv",
        "公司关联库.xls": "Company Related Pool.csv",
        "公司限制库.xls": "Company Limited Pool.csv",
        "公司股票库.xls": "Company Investment Pool.csv",
        "绝对收益禁止库.xls": "ABS Fund Forbidden Pool.csv",
        "绝对收益投资库.xls": "ABS Fund Investment Pool.csv",
        "量化限制库.xls": "Quantitative Limited Pool.csv"
    }
    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")

    for scr_file, out_file in pool_list.items():

        src_file = os.path.join(pool_path, 'raw_file', before_trade_data,
                                scr_file)
        out_file = os.path.join(out_sub_path, out_file)
        data = pd.read_excel(src_file, index_col=[0])
        data.index = data['证券代码'].map(stock_code_add_postfix)
        data.index = data.index.map(lambda x: x[0:6] + '-CN')
        data['Status'] = 1.0
        data.to_csv(out_file, header=None, columns=['Status'])

    # 股票库 Company Investment Pool.csv 包括公司股票库和公司超5库
    ##################################################################################
    stock_pool_file = os.path.join(pool_path, 'raw_file', before_trade_data,
                                   "公司股票库.xls")
    stock_pool = pd.read_excel(stock_pool_file, index_col=[0])
    stock_pool.index = stock_pool['证券代码'].map(stock_code_add_postfix)
    stock_pool.index = stock_pool.index.map(lambda x: x[0:6] + '-CN')
    stock_pool['Status'] = 1.0

    stock_5_pool_file = os.path.join(pool_path, 'raw_file', before_trade_data,
                                     "公司超五库.xls")
    stock_5_pool = pd.read_excel(stock_5_pool_file, index_col=[0])
    stock_5_pool.index = stock_5_pool['证券代码'].map(stock_code_add_postfix)
    stock_5_pool.index = stock_5_pool.index.map(lambda x: x[0:6] + '-CN')
    stock_5_pool['Status'] = 1.0

    out_file = os.path.join(out_sub_path, "Company Investment Pool.csv")
    res = pd.concat([stock_5_pool['Status'], stock_pool['Status']], axis=0)
    res.to_csv(out_file, header=None)

    # Recent IPO Stock.csv
    ##################################################################################
    ipo_date_pd = Stock().get_ipo_date()
    beg_date = (datetime.strptime(today, '%Y%m%d') -
                timedelta(days=365)).strftime("%Y%m%d")
    ipo_date_pd = ipo_date_pd[ipo_date_pd['IPO_DATE'] > beg_date]
    ipo_date_pd.loc[:, 'IPO_DATE'] = 1.0
    ipo_date_pd.index = ipo_date_pd.index.map(lambda x: x[0:6] + '-CN')

    filename = 'Recent IPO Stock.csv'
    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")
    print('loading ', filename, ' ......')
    ipo_date_pd.to_csv(os.path.join(out_sub_path, filename),
                       header=None,
                       columns=['IPO_DATE'])

    # Suspended List.csv
    ##################################################################################

    status_data = Stock().get_trade_status_date(today)
    ipo_date_pd = Stock().get_ipo_date()
    data = pd.concat([status_data, ipo_date_pd], axis=1)
    data = data.dropna()
    data = data[data['DELIST_DATE'] >= today]
    data['Trade_Status'] = 1.0
    data.index = data.index.map(lambda x: x[0:6] + '-CN')

    filename = 'Suspended List.csv'
    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")
    print('loading ', filename, ' ......')
    data.to_csv(os.path.join(out_sub_path, filename),
                header=None,
                columns=['Trade_Status'])

    # Benchmark.csv 5.5 现金
    ##################################################################################
    benchmark_dict = {
        "000905.SH": "CSI500 Benchmark.csv",
        "000300.SH": "CSI300 Benchmark.csv",
        "000016.SH": "CSI50 Benchmark.csv"
    }

    for index_code, out_file in benchmark_dict.items():

        data = Index().get_weight(index_code, before_trade_data)
        data.index = data.index.map(lambda x: x[0:6] + '-CN')
        data['WEIGHT'] *= 94.5
        result = pd.DataFrame([5.5], index=["CSH_CNY"], columns=['WEIGHT'])
        result = pd.concat([result, data], axis=0)

        out_sub_path = os.path.join(out_path, person_name, today,
                                    "holding_data")
        result.to_csv(os.path.join(out_sub_path, out_file),
                      header=None,
                      columns=['WEIGHT'])

    # 英文持仓情况
    ##################################################################################

    en_holding_dict = {
        "泰达宏利量化增强": "Quantitative Enhencement portfolio.csv",
        "泰达宏利业绩驱动量化": "Quantitative Earning Drive.csv",
        "泰达新思路": "New Thinking Portfolio.csv",
        "泰达宏利集利债券": "High Dividend Bond Equity.csv",
        "泰达宏利沪深300": "CSI300 Portfolio.csv",
        "泰达中证500指数分级": "CSI500 Portfolio.csv"
    }
    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")

    fund_sec = MfcData().get_fund_security(before_trade_data)

    for name, out_file in en_holding_dict.items():

        fund_sec_one = fund_sec[fund_sec['基金名称'] == name]
        fund_sec_one = fund_sec_one[fund_sec_one['证券类别'] == '股票']
        fund_sec_one = fund_sec_one[['证券代码', '持仓']]
        fund_sec_one.index = fund_sec_one['证券代码'].map(stock_code_add_postfix)
        fund_sec_one.index = fund_sec_one.index.map(lambda x: x[0:6] + '-CN')
        print(fund_sec_one)
        if out_file != "High Dividend Bond Equity.csv":
            asset = MfcData().get_fund_asset(before_trade_data)
            asset.index = asset['基金名称']
            asset = asset[~asset.index.duplicated()]
            fund_sec_one = fund_sec_one[~fund_sec_one.index.duplicated()]
            fund_sec_one.ix['CSH_CNY', "持仓"] = asset.ix[name, "当前现金余额"]
        out_file = os.path.join(out_sub_path, out_file)
        fund_sec_one['持仓'] = fund_sec_one['持仓'].round(0)
        fund_sec_one.to_csv(out_file, header=None, columns=['持仓'])

    # 英文绝对收益持仓情况
    ##################################################################################

    en_holding_dict = {
        "绝对收益50对冲股票组合": "Absolute Return Strategy CSI50 Portfolio.csv",
        "绝对收益300对冲股票组合": "Absolute Return Strategy CSI300 Portfolio.csv",
        "绝对收益500对冲股票组合": "Absolute Return Strategy CSI500 Portfolio.csv",
    }
    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")

    fund_sec = MfcData().get_group_security(before_trade_data)

    for name, out_file in en_holding_dict.items():

        fund_sec_one = fund_sec[fund_sec['组合名称'] == name]
        fund_sec_one = fund_sec_one[fund_sec_one['证券类别'] == '股票']
        fund_sec_one = fund_sec_one[['证券代码', '持仓']]
        fund_sec_one.index = fund_sec_one['证券代码'].map(stock_code_add_postfix)
        fund_sec_one.index = fund_sec_one.index.map(lambda x: x[0:6] + '-CN')
        out_file = os.path.join(out_sub_path, out_file)
        fund_sec_one.to_csv(out_file, header=None, columns=['持仓'])

    # China Market Index.csv
    ##################################################################################
    data = Index().get_weight("China_Index_Benchmark", before_trade_data)
    out_file = "China Market Index.csv"
    data.index = data.index.map(lambda x: x[0:6] + '-CN')
    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")
    data.to_csv(os.path.join(out_sub_path, out_file),
                header=None,
                columns=['WEIGHT'])

    # Monitor 基金证券
    ##################################################################################
    en_holding_dict = {"泰达中证500指数分级": "CSI500 Monitor.csv"}
    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")

    fund_sec = MfcData().get_fund_security(before_trade_data)

    for name, out_file in en_holding_dict.items():

        fund_sec_one = fund_sec[fund_sec['基金名称'] == name]
        fund_sec_one = fund_sec_one[fund_sec_one['证券类别'] == '股票']
        fund_sec_one = fund_sec_one[['证券代码', '持仓']]
        fund_sec_one.columns = ['STOCK_CODE', 'HOLDING']
        fund_sec_one.index = fund_sec_one['STOCK_CODE'].map(
            stock_code_add_postfix)
        out_file = os.path.join(out_sub_path, out_file)
        fund_sec_one.to_csv(out_file, columns=['HOLDING'])

    # Monitor 组合证券 绝对收益
    ##################################################################################
    out_sub_path = os.path.join(out_path, person_name, today, "holding_data")
    en_holding_dict = {
        "绝对收益50对冲股票组合": "Absolute Trading Monitor CSI50.csv",
        "绝对收益300对冲股票组合": "Absolute Trading Monitor CSI300.csv",
        "绝对收益500对冲股票组合": "Absolute Trading Monitor CSI500.csv",
        "绝对收益期货组合": "Absolute Monitor Option.csv",
    }
    fund_sec = MfcData().get_group_security(before_trade_data)

    for name, out_file in en_holding_dict.items():

        fund_sec_one = fund_sec[fund_sec['组合名称'] == name]
        # fund_sec_one = fund_sec_one[fund_sec_one['资产类别'] == '股票资产']
        fund_sec_one = fund_sec_one[['证券代码', '持仓']]
        fund_sec_one.columns = ['STOCK_CODE', 'HOLDING']
        if name != "绝对收益期货组合":
            fund_sec_one.index = fund_sec_one['STOCK_CODE'].map(
                stock_code_add_postfix)
        else:
            fund_sec_one.index = fund_sec_one['STOCK_CODE']
        out_file = os.path.join(out_sub_path, out_file)
        fund_sec_one.to_csv(out_file, columns=['HOLDING'])
コード例 #3
0
ファイル: AdvanceReceiptsEquity.py プロジェクト: rlcjj/quant
def AdvanceReceiptsEquity(beg_date, end_date):
    """
    因子说明:预收账款 / 净资产
    同一财报期
    若有一个为负值 结果为负值
    """

    # param
    #################################################################################
    factor_name = 'AdvanceReceiptsEquity'
    ipo_num = 90

    # read data
    #################################################################################
    advance = Stock().get_factor_h5("AdvanceReceipts", None, "primary_mfc")
    equity = Stock().get_factor_h5("TotalShareHoldeRequity", None,
                                   "primary_mfc")

    advance = StockFactorOperate().change_quarter_to_daily_with_report_date(
        advance, beg_date, end_date)
    equity = StockFactorOperate().change_quarter_to_daily_with_report_date(
        equity, beg_date, end_date)

    # data precessing
    #################################################################################
    [advance, equity] = Stock().make_same_index_columns([advance, equity])

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)

    for i in range(0, len(date_series)):

        current_date = date_series[i]

        if current_date in advance.columns:
            advance_date = advance[current_date]
            equity_date = equity[current_date]
            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))

            data_date = pd.concat([advance_date, equity_date], axis=1)
            data_date.columns = ['advance_date', 'equity_date']
            data_date = data_date.dropna()
            data_date = data_date[data_date['equity_date'] != 0.0]
            data_date[
                'ratio'] = data_date['advance_date'] / data_date['equity_date']

            # 只要有一个是负数 比例为负数
            mimus_index = (data_date['advance_date'] <
                           0.0) | (data_date['equity_date'] < 0.0)
            data_date.loc[mimus_index,
                          'ratio'] = -data_date.loc[mimus_index,
                                                    'ratio'].abs()
        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))
            data_date = pd.DataFrame([],
                                     columns=["ratio"],
                                     index=advance.index)

        if i == 0:
            res = pd.DataFrame(data_date['ratio'].values,
                               columns=[current_date],
                               index=data_date.index)
        else:
            res_add = pd.DataFrame(data_date['ratio'].values,
                                   columns=[current_date],
                                   index=data_date.index)
            res = pd.concat([res, res_add], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #4
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # param
        long_term = 120
        effective_term = 96

        # read data
        price = Stock().read_factor_h5("PriceCloseAdjust")
        trade_amount = Stock().read_factor_h5("TradeAmount")

        # data precessing
        [trade_amount,
         price] = Stock().make_same_index_columns([trade_amount, price])

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(price.columns))
        date_series.sort()
        res = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(long_term - 1))
            price_before = price.loc[:, data_beg_date:current_date]
            price_before = price_before.T.dropna(how='all').T
            pct_current = price.loc[:, current_date]
            trade_amount_before = trade_amount.loc[:,
                                                   data_beg_date:current_date]
            trade_amount_before = trade_amount_before.T.dropna(how='all').T

            if len(price_before) > effective_term:

                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                price_sub_abs = price_before.sub(pct_current,
                                                 axis='index').abs()
                w1 = np.log(1 /
                            price_sub_abs.mul(1 / pct_current, axis='index'))

                # 要扣除价格等于当前价格情况,此情况下空间权为0
                w1[np.isinf(w1)] = 0.0
                n = len(price_before.columns)
                l = len(price_before)

                weight = np.array(range(1, 1 + n)) / np.array(range(
                    1, 1 + n)).sum()
                w2 = pd.DataFrame(np.tile(weight, (l, 1)),
                                  index=price_before.index,
                                  columns=price_before.columns)

                total_power = trade_amount_before.mul(w1).mul(w2)
                sign = np.sign(price_before.sub(pct_current, axis='index'))
                resistance_power = sign.mul(total_power)

                ratio = resistance_power.sum(axis=1) / total_power.sum(axis=1)
                ratio = pd.DataFrame(ratio.values,
                                     index=ratio.index,
                                     columns=[current_date])

            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))
                ratio = pd.DataFrame([],
                                     columns=[current_date],
                                     index=trade_amount_before.index)

            res = pd.concat([res, ratio], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
コード例 #5
0
def SPTTMDaily(beg_date, end_date):
    """
    因子说明:总营收 / 总市值
    TTM 为不同一财报期 最近可以得到的最新财报
    若有一个为负值 结果为负值
    """

    # param
    #################################################################################
    factor_name = "SPTTMDaily"
    ipo_num = 90

    # read data
    #################################################################################
    income = Stock().get_factor_h5("OperatingIncome", None, "primary_mfc")
    income = StockFactorOperate().change_single_quarter_to_ttm_quarter(income)
    report_data = Stock().get_factor_h5("OperatingIncomeDaily", "ReportDate",
                                        'primary_mfc')
    income = StockFactorOperate().change_quarter_to_daily_with_disclosure_date(
        income, report_data, beg_date, end_date)
    mv = Stock().get_factor_h5("TotalMarketValue", None, "alpha_dfc")

    # data precessing
    #################################################################################
    [income, mv] = Stock().make_same_index_columns([income, mv])

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)

    for i in range(0, len(date_series)):

        current_date = date_series[i]

        if current_date in income.columns:

            income_date = income[current_date]
            mv_date = mv[current_date]
            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))

            data_date = pd.concat([income_date, mv_date], axis=1)
            data_date.columns = ['income', 'mv']

            data_date = data_date.dropna()
            data_date = data_date[data_date['mv'] != 0.0]
            data_date['ratio'] = data_date['income'] / data_date['mv']

            # 只要有一个是负数 比例为负数
            mimus_index = (data_date['income'] < 0.0) | (data_date['mv'] < 0.0)
            data_date.loc[mimus_index,
                          'ratio'] = -data_date.loc[mimus_index,
                                                    'ratio'].abs()
        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))
            data_date = pd.DataFrame([], columns=["ratio"], index=income.index)

        if i == 0:
            res = pd.DataFrame(data_date['ratio'].values,
                               columns=[current_date],
                               index=data_date.index)
        else:
            res_add = pd.DataFrame(data_date['ratio'].values,
                                   columns=[current_date],
                                   index=data_date.index)
            res = pd.concat([res, res_add], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #6
0
ファイル: VolumeLnMean120d.py プロジェクト: rlcjj/quant
def VolumeLnMean120d(beg_date, end_date):
    """
    因子说明:过去120天的-1*log(交易额)的加权平均 权为随时间线性递减
    """

    # param
    #################################################################################
    LongTerm = 120
    HalfTerm = int(LongTerm / 2)
    factor_name = 'VolumeLnMean120d'
    ipo_num = 90

    # read data
    #################################################################################
    trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc").T

    # code set & date set
    #################################################################################
    trade_amount = trade_amount.fillna(0.0)

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(trade_amount.index) & set(date_series))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date,
                                                     -(LongTerm - 1))
        amount_before = trade_amount.ix[data_beg_date:current_date, :]

        if len(amount_before) == LongTerm:

            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))
            zero_number = amount_before.applymap(lambda x: 1.0
                                                 if x == 0.0 else 0.0).sum()
            code_filter_list = (zero_number[zero_number < HalfTerm]).index

            amount_before = trade_amount.ix[data_beg_date:current_date,
                                            code_filter_list]
            amount_before_log = amount_before.applymap(
                lambda x: np.nan if x == 0 else -np.log(x))

            weight = np.array(list(range(1, LongTerm + 1)))
            weight_amount_log_val = np.dot(amount_before_log.T.values, weight)
            weight_amount_log = pd.DataFrame(weight_amount_log_val,
                                             index=amount_before_log.columns,
                                             columns=[current_date])

        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))
            weight_amount_log = pd.DataFrame([],
                                             columns=[current_date],
                                             index=trade_amount.columns)

        if i == 0:
            res = weight_amount_log
        else:
            res_add = weight_amount_log
            res = pd.concat([res, res_add], axis=1)

    res = res.T.dropna(how='all').T
    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #7
0
ファイル: fund_rank.py プロジェクト: easy00000000/quant
    def rank_fund2(self,
                   fund_pct,
                   bench_pct,
                   fund_code,
                   rank_pool,
                   beg_date,
                   end_date,
                   new_fund_date=None,
                   excess=False):
        """
        计算某只基金在基金池的排名
        三种排名方式
        1、直接获取wind接口结果
        2、自己给定基金池,本地基金数据取得基金绝对收益
        3、基金给定基金池,本地基金数据获取基金超额收益
        """

        if new_fund_date is None:
            new_fund_date = beg_date

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)
        new_fund_date = Date().change_to_str(new_fund_date)

        print(" 正在计算基金排名 %s 在基金池 %s 从 %s 到 %s " %
              (fund_code, rank_pool, beg_date, end_date))

        # 分类获取排名
        if rank_pool == 'wind':

            # Wind 三级分类
            date_str = "startDate=%s;endDate=%s;fundType=3" % (beg_date,
                                                               end_date)
            data = w.wss(fund_code, "peer_fund_return_rank_per", date_str)
            val = str(data.Data[0][0])
            data = w.wss(fund_code, "peer_fund_return_rank_prop_per", date_str)

            try:
                pct = np.round(data.Data[0][0] / 100.0, 3)
            except Exception as e:
                print(e)
                print("wind返回基金排名百分比非数字")
                pct = "None"
            return val, pct

        else:

            # 获取基金池
            pool = FundPool().get_fund_pool_all(date="20181231",
                                                name=rank_pool)
            bool_series = (pool['if_connect'] == '非联接基金') & (pool['if_hk']
                                                             == '非港股基金')
            bool_series &= (pool['if_a'] == 'A类基金')
            bool_series &= (pool['if_etf'] == '非ETF基金')
            pool = pool[bool_series]

            if not excess:

                # 区间总收益排名
                # fund_pct = Fund().get_fund_factor("Repair_Nav_Pct")
                fund_pct = fund_pct.loc[beg_date:end_date, pool.index]
                fund_pct = fund_pct.dropna(how='all')
                data = (fund_pct / 100.0 + 1.0).cumprod() - 1.0
                data = pd.DataFrame(data.iloc[-1, :])
                data.columns = ['Pct']
                data = data[~data.index.duplicated()]
                data = data.dropna()

                data = pd.concat([data, pool], axis=1)
                data = data[data["setupdate"] <= new_fund_date]
                data = data.dropna(subset=['Pct'])
                data = data.sort_values(by='Pct', ascending=False)

                data['range'] = range(len(data))
                data["rank"] = data['range'].map(
                    lambda x: str(x + 1) + "/" + str(len(data)))
                data['rank_pct'] = data['range'].map((lambda x:
                                                      (x + 1) / len(data)))
                try:
                    val = data.loc[fund_code, "rank"]
                    pct = data.loc[fund_code, "rank_pct"]
                    pct = np.round(pct, 3)
                    file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date,
                                                end_date)
                    file = os.path.join(self.data_path, file)
                    data.to_csv(file)
                except Exception as e:
                    print(e)
                    val = "None"
                    pct = "None"
                return val, pct
            else:

                # 区间超额收益排名
                # fund_pct = Fund().get_fund_factor("Repair_Nav_Pct")
                # bench_pct = Fund().get_fund_factor("Fund_Bench_Pct") * 100
                excess_pct = fund_pct.sub(bench_pct)
                excess_pct = excess_pct.loc[beg_date:end_date, pool.index]
                excess_pct = excess_pct.dropna(how='all')
                data = (excess_pct / 100.0 + 1.0).cumprod() - 1.0
                data = pd.DataFrame(data.iloc[-1, :])
                data.columns = ['Pct']
                data = data[~data.index.duplicated()]
                data = data.dropna()

                data = pd.concat([data, pool], axis=1)
                data = data[data["setupdate"] <= new_fund_date]
                data = data.dropna(subset=['Pct'])
                data = data.sort_values(by='Pct', ascending=False)

                data['range'] = range(len(data))
                data["rank"] = data['range'].map(
                    lambda x: str(x + 1) + "/" + str(len(data)))
                data['rank_pct'] = data['range'].map((lambda x:
                                                      (x + 1) / len(data)))

                try:
                    val = data.loc[fund_code, "rank"]
                    pct = data.loc[fund_code, "rank_pct"]
                    pct = np.round(pct, 3)
                    file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date,
                                                end_date)
                    file = os.path.join(self.data_path, file)
                    data.to_csv(file)
                except Exception as e:
                    print(e)
                    val = "None"
                    pct = "None"
                return val, pct
コード例 #8
0
ファイル: ROICTTM.py プロジェクト: rlcjj/quant
def ROICTTM(beg_date, end_date):

    """
    因子说明:(营业收入TTM - 营业成本TTM) / 全部投入资本
    TTM 为统一财报期
    """

    # param
    #################################################################################
    factor_name = "ROICTTM"
    ipo_num = 90

    # read data
    #################################################################################
    cost = Stock().get_factor_h5("OperatingCost", None, "primary_mfc")
    income = Stock().get_factor_h5("OperatingIncome", None, "primary_mfc")
    investcapital = Stock().get_factor_h5("Investcapital", None, "primary_mfc")

    cost = StockFactorOperate().change_single_quarter_to_ttm_quarter(cost)
    income = StockFactorOperate().change_single_quarter_to_ttm_quarter(income)
    investcapital = StockFactorOperate().change_single_quarter_to_ttm_quarter(investcapital)

    investcapital /= 4.0

    cost = StockFactorOperate().change_quarter_to_daily_with_report_date(cost, beg_date, end_date)
    income = StockFactorOperate().change_quarter_to_daily_with_report_date(income, beg_date, end_date)
    investcapital = StockFactorOperate().change_quarter_to_daily_with_report_date(investcapital, beg_date, end_date)

    # data precessing
    #################################################################################
    [cost, income, investcapital] = Stock().make_same_index_columns([cost, income, investcapital])

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)

    for i in range(0, len(date_series)):

        current_date = date_series[i]

        if current_date in cost.columns:

            cost_date = cost[current_date]
            income_date = income[current_date]
            investcapital_date = investcapital[current_date]
            print('Calculating factor %s at date %s' % (factor_name, current_date))

            data_date = pd.concat([cost_date, income_date, investcapital_date], axis=1)
            data_date.columns = ['cost', 'income', 'investcapital']

            """ 这里本来应该对行业做一些调整
            filename = in_path[0:len(in_path)-13] + "DataSet\\industry_citic.txt"
            industry = pd.read_table(filename, index_col=[0], encoding='gbk', header=None)
            cost_industry = pd.concat([operating_cost_ttm, industry], axis=1)
            cost_industry.columns = ['value', 'industry']
            filter1 = cost_industry['industry'].map(lambda x: x in ['银行', '非银行金融'])
            filter2 = cost_industry['industry'].map(lambda x: x is np.nan)
            filter_total = filter1 & filter2
            cost_industry.ix[filter_total, 'value'] = 0.0
            operating_cost_ttm = pd.DataFrame(cost_industry['value'].values,
            index=cost_industry.index, columns=[curent_date])
            """

            data_date['diff'] = data_date['income'] - data_date['cost']
            data_date = data_date.dropna()
            data_date = data_date[data_date['investcapital'] != 0.0]
            data_date['ratio'] = data_date['diff'] / data_date['investcapital']

            # 只要有一个是负数 比例为负数
            mimus_index = (data_date['diff'] < 0.0) | (data_date['investcapital'] < 0.0)
            data_date.loc[mimus_index, 'ratio'] = - data_date.loc[mimus_index, 'ratio'].abs()
        else:
            print('Calculating factor %s at date %s is null' % (factor_name, current_date))
            data_date = pd.DataFrame([], columns=["ratio"], index=cost.index)

        if i == 0:
            res = pd.DataFrame(data_date['ratio'].values, columns=[current_date], index=data_date.index)
        else:
            res_add = pd.DataFrame(data_date['ratio'].values, columns=[current_date], index=data_date.index)
            res = pd.concat([res, res_add], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #9
0
    def regress_fund(self, fund_code, beg_date, end_date):
        """ 回归基金净值 和上季度重仓股票的涨跌幅和债券基金 """

        period = "W"
        date_series = Date().get_trade_date_series(beg_date, end_date, period)

        fund_return = self.fund_pct[fund_code]
        fund_return = fund_return.dropna()
        date_series = list(set(date_series) & set(fund_return.index))
        date_series.sort()

        # 季报持仓
        quarter_weight = Fund().get_fund_holding_quarter(fund_code)
        r2_series = pd.DataFrame([], index=date_series, columns=['r2'])

        for i_date in range(0, len(date_series)):

            # 时间确定
            # 若此时离上个季报时间较短 则回归时间很短
            # 若此时离上个季报时间较长 则回归时间较长
            ed_date = date_series[i_date]
            ed_date = Date().get_trade_date_offset(ed_date, -0)
            quarter_date = Date().get_last_fund_quarter_date(ed_date)

            bg_date = Date().get_trade_date_offset(ed_date,
                                                   -(self.regression_len - 1))
            bg_date = max(bg_date, quarter_date)
            bg_date = Date().get_trade_date_offset(bg_date, -0)

            date_diff = Date().get_trade_date_diff(bg_date, ed_date)

            # 上期持仓
            try:
                stock_weight = pd.DataFrame(quarter_weight[quarter_date])
                stock_weight = stock_weight.dropna()
                stock_weight.columns = ['Weight']

                # 收益率数据
                data = pd.concat([fund_return, self.stock_pct, self.bold_pct],
                                 axis=1)
                data['885062.WI'] = data['885062.WI'].fillna(0.0)
                regress_date_series = Date().get_trade_date_series(
                    bg_date, ed_date)
                data = data.loc[regress_date_series, :]
                data = data.T.dropna(thresh=self.regression_min_len).T
                data = data.fillna(data.mean(axis=1))

                # 股票池
                stock_pool = list(stock_weight.index)
                stock_pool = list(set(stock_pool) & set(data.columns[1:]))
                stock_pool.sort()
                stock_pool.append("885062.WI")

                stock_ratio = self.get_fund_stock_ratio(
                    fund_code, quarter_date)
                stock_weight['Weight'] /= stock_weight['Weight'].sum()
                stock_weight['Weight'] *= stock_ratio
                stock_weight.loc["885062.WI", "Weight"] = 100 - stock_ratio
                stock_weight /= 100.0
                stock_weight = stock_weight.loc[stock_pool, :]
                stock_weight['Weight'] /= stock_weight['Weight'].sum()

                print("## Cal Regress %s %s %s %s %s ##" %
                      (fund_code, quarter_date, bg_date, ed_date, len(data)))

                if (len(data) > self.regression_min_len) and (len(stock_pool) >
                                                              4):

                    # 利用股票拟合基金收益率 最小化跟踪误差的前提
                    # 指数权重之和为1 指数不能做空 指数和上期季报权重换手不能太大

                    y = data[fund_code].values / 100.0
                    x = data[stock_pool].values / 100.0
                    n = len(y)
                    k = x.shape[1]
                    weight_old = stock_weight.T.values[0]
                    turnover = date_diff * 0.8 / 100
                    print("TurnOver %s " % turnover)

                    # 最优化
                    ##############################################################################
                    w = cvx.Variable(k)
                    sigma = y - x * w
                    prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [
                        cvx.sum(w) == 1.0, w >= 0,
                        cvx.sum(cvx.abs(w - weight_old)) <= turnover
                    ])
                    prob.solve()

                    print('Solver Status : ', prob.status)
                    params_add = pd.DataFrame(w.value,
                                              columns=[ed_date],
                                              index=stock_pool)

                    # 计算回归R2
                    ##############################################################################
                    tss = np.sum((y - np.mean(y))**2) / n
                    y_res = y - np.dot(x, w.value)
                    rss = np.sum(y_res**2) / (n - k - 1)
                    r2 = 1 - rss / tss
                    params_add.loc["R2", ed_date] = r2

                    print(params_add.T)

                else:
                    params_add = pd.DataFrame([],
                                              columns=[ed_date],
                                              index=stock_pool)
            except Exception as e:
                params_add = pd.DataFrame([], columns=[ed_date])

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=1)

        # 合并新数据
        ####################################################################
        params_new = params_new.T
        out_file = os.path.join(self.data_path_exposure, fund_code + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new

        params.to_csv(out_file)
コード例 #10
0
ファイル: fund_exposure.py プロジェクト: rlcjj/quant
    def cal_fund_holder_exposure(self, fund, beg_date, end_date):

        # 每半年计算一次
        type_list = ['STYLE', 'COUNTRY', 'INDUSTRY']
        date_series = Date().get_normal_date_series(beg_date,
                                                    end_date,
                                                    period='S')

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            report_date = Date().get_normal_date_month_end_day(date)
            trade_date = Date().get_trade_date_month_end_day(date)

            barra_name = list(
                Barra().get_factor_name(type_list)['NAME_EN'].values)
            barra_exposure = Barra().get_factor_exposure_date(
                trade_date, type_list)
            fund_holding = FundHolder().get_fund_holding_report_date_fund(
                fund, report_date)
            print("########## Calculate Holder Exposure %s %s ##########" %
                  (fund, report_date))

            if (barra_exposure is None) or (fund_holding is None):
                exposure_add = pd.DataFrame([],
                                            columns=barra_name,
                                            index=[report_date])
            else:
                fund_holding = fund_holding['Weight']
                data = pd.concat([fund_holding, barra_exposure], axis=1)
                data = data.dropna()

                if (len(data) == 0) or (data is None):
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])
                else:
                    exposure_add = pd.DataFrame([],
                                                columns=barra_name,
                                                index=[report_date])

                    for i_factor in range(len(barra_name)):
                        factor_name = barra_name[i_factor]
                        data_weight = data[['Weight', factor_name]]
                        data_weight['StockExposure'] = data['Weight'] * data[
                            factor_name]
                        exposure_add.ix[report_date,
                                        factor_name] = data_weight[
                                            'StockExposure'].sum() / 100.0

            if i_date == 0:
                exposure_new = exposure_add
            else:
                exposure_new = pd.concat([exposure_new, exposure_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = Parameter().get_read_file(self.holder_exposure_name)
        out_file = os.path.join(out_path,
                                'Fund_Holder_Exposure_' + fund + '.csv')

        if os.path.exists(out_file):
            exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            exposure_old.index = exposure_old.index.map(str)
            params = pandas_add_row(exposure_old, exposure_new)
        else:
            params = exposure_new
        params.to_csv(out_file)
コード例 #11
0
def weight_allstock_holding_date(report_date):

    report_date = Date().change_to_str(report_date)
    data = Fund().get_fund_holding_stock_date(report_date)
    data = data[['FundCode', 'Weight', 'StockCode']]

    pool = Fund().get_fund_pool_code(report_date, "基金持仓基准基金池")
    fund_code = list(set(pool))
    fund_code.sort()

    weight = Fund().get_fund_factor("Total_Asset", date_list=[report_date]).T
    weight = weight.dropna()

    for i_fund in range(len(fund_code)):

        fund = fund_code[i_fund]
        data_fund = data[data['FundCode'] == fund]
        data_fund = data_fund.dropna(subset=['Weight'])
        data_fund = data_fund.sort_values(by=['Weight'], ascending=False)

        try:
            asset = weight.ix[fund, report_date]
            asset /= 100000000
        except Exception as e:
            asset = 1.0

        if i_fund == 0:
            data_fund_all = data_fund.copy()
            data_fund_all["Asset_Weight"] = data_fund_all['Weight'] * asset
            all_weight = data_fund_all['Weight'].sum()
            if all_weight < 60:
                data_fund_all = pd.DataFrame([], columns=data_fund.columns)
        else:
            data_fund_all_add = data_fund.copy()
            data_fund_all_add[
                "Asset_Weight"] = data_fund_all_add['Weight'] * asset
            all_weight = data_fund_all_add['Weight'].sum()
            if all_weight < 60:
                data_fund_all_add = pd.DataFrame([], columns=data_fund.columns)
            data_fund_all = pd.concat([data_fund_all, data_fund_all_add],
                                      axis=0)

    stock_code = list(set(data_fund_all['StockCode'].values))
    stock_code.sort()
    weight_sum = data_fund_all['Asset_Weight'].sum()
    weight_code = pd.DataFrame([], index=stock_code, columns=['Asset_Weight'])

    for i_stock in range(len(stock_code)):
        stock = stock_code[i_stock]
        data_stock = data_fund_all[data_fund_all['StockCode'] == stock]
        stock_weight_sum = data_stock['Asset_Weight'].sum()
        weight_code.ix[stock, 'Asset_Weight'] = stock_weight_sum / weight_sum

    weight_code.index = weight_code.index.map(lambda x: x[0:6] + '-CN')
    out_path = os.path.join(Fund().data_path_holder, "fund_holding_benchmark")
    out_path = os.path.join(out_path, "weight_halfyear_all")
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    out_file = os.path.join(out_path,
                            "weight_halfyear_all_" + report_date + '.csv')
    print(out_file)
    weight_code.to_csv(out_file, header=None)
コード例 #12
0
ファイル: fund_exposure.py プロジェクト: rlcjj/quant
    def cal_fund_regression_exposure(self,
                                     fund,
                                     beg_date,
                                     end_date,
                                     period="M"):

        # 参数
        ####################################################################
        up_style_exposure = 1.5
        up_position_exposure = 0.95
        low_position_exposure = 0.75
        position_sub = 0.10

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        # 取得数据
        ####################################################################
        type_list = ['STYLE', 'COUNTRY']

        barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values)
        barra_return = Barra().get_factor_return(None, None, type_list)

        date_series = Date().get_trade_date_series(beg_date,
                                                   end_date,
                                                   period=period)
        fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None,
                                                   [fund])

        data = pd.concat([fund_return, barra_return], axis=1)
        data = data.dropna()
        print(" Fund Code Total Len %s " % len(data))
        factor_number = len(barra_name)

        # 循环回归计算每天的暴露
        ####################################################################

        for i_date in range(0, len(date_series)):

            period_end_date = date_series[i_date]
            period_beg_date = Date().get_trade_date_offset(
                period_end_date, -self.regression_period)

            period_date_series = Date().get_trade_date_series(
                period_beg_date, period_end_date)
            data_periods = data.ix[period_date_series, :]
            data_periods = data_periods.dropna()

            quarter_date = Date().get_last_fund_quarter_date(period_end_date)
            stock_ratio = (FundFactor().get_fund_factor(
                "Stock_Ratio", [quarter_date], [fund]) / 100).values[0][0]
            print(
                "########## Calculate Regression Exposure %s %s %s %s %s %s ##########"
                % (fund, period_beg_date, period_end_date, quarter_date,
                   len(data_periods), stock_ratio))

            if len(data_periods) > self.regression_period_min:

                y = data_periods.ix[:, 0].values
                x = data_periods.ix[:, 1:].values
                x_add = sm.add_constant(x)

                low_position_exposure = max(stock_ratio - position_sub,
                                            low_position_exposure)
                print(low_position_exposure)

                P = 2 * np.dot(x_add.T, x_add)
                Q = -2 * np.dot(x_add.T, y)

                G_up = np.diag(np.ones(factor_number + 1))
                G_low = -np.diag(np.ones(factor_number + 1))
                G = np.row_stack((G_up, G_low))
                h_up = np.row_stack((np.ones(
                    (factor_number, 1)) * up_style_exposure,
                                     np.array([up_position_exposure])))
                h_low = np.row_stack((np.ones(
                    (factor_number, 1)) * up_style_exposure,
                                      np.array([-low_position_exposure])))
                h = np.row_stack((h_up, h_low))

                P = matrix(P)
                Q = matrix(Q)
                G = matrix(G)
                h = matrix(h)
                try:
                    result = sol.qp(P, Q, G, h)
                    params_add = pd.DataFrame(np.array(result['x'][1:]),
                                              columns=[period_end_date],
                                              index=barra_name).T
                    print(params_add)
                except:
                    params_add = pd.DataFrame([],
                                              columns=[period_end_date],
                                              index=barra_name).T
                    print(params_add)

            else:
                params_add = pd.DataFrame([],
                                          columns=[period_end_date],
                                          index=barra_name).T
                print(params_add)

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = Parameter().get_read_file(self.regression_exposure_name)
        out_file = os.path.join(out_path,
                                'Fund_Regression_Exposure_' + fund + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = pandas_add_row(params_old, params_new)
        else:
            params = params_new
        print(params)
        params.to_csv(out_file)
コード例 #13
0
def cal_factor_liquidity(beg_date, end_date):

    """
    因子说明:流动性因子 LIQUIDITY

    LIQUIDITY_STOM 最近21个交易日的换手率总和的对数值

    LIQUIDITY_STOA 最近252个交易日的换手率总和的对数值
    LIQUIDITY = 0.35 * LIQUIDITY_STOM + 0.35 * LIQUIDITY_STOQ + 0.3 * LIQUIDITY_STOA
    LIQUIDITY 在对 SIZE 因子做回归取残差

    """

    # params
    ##################################################################################
    factor_name = "NORMAL_CNE5_LIQUIDITY"



    A = 252

    beg_date = Date().change_to_str(beg_date)
    end_date = Date().change_to_str(end_date)
    beg_date =

    # params
    ##################################################################################
    turnover_daily = Stock().get_factor_h5("TurnOver_Daily", None, 'primary_mfc').T
    turnover_month = turnover_daily.rolling(window=M).sum().applymap(np.log)
    turnover_quarter = (turnover_daily.rolling(window=Q).sum() / 3.0).applymap(np.log)
    turnover_yearly = (turnover_daily.rolling(window=A).sum() / 12.0).applymap(np.log)


    turnover_quarter = turnover_quarter.dropna(how='all').T
    turnover_yearly = turnover_yearly.dropna(how='all').T


    Stock().write_factor_h5(turnover_quarter, "RAW_CNE5_LIQUIDITY_STOQ", 'barra_risk_dfc')
    Stock().write_factor_h5(turnover_yearly, "RAW_CNE5_LIQUIDITY_STOA", 'barra_risk_dfc')


    turnover_quarter = FactorPreProcess().remove_extreme_value_mad(turnover_quarter)
    turnover_quarter = FactorPreProcess().standardization_free_mv(turnover_quarter)
    turnover_yearly = FactorPreProcess().remove_extreme_value_mad(turnover_yearly)
    turnover_yearly = FactorPreProcess().standardization_free_mv(turnover_yearly)

    Stock().write_factor_h5(turnover_quarter, "NORMAL_CNE5_LIQUIDITY_STOQ", 'barra_risk_dfc')
    Stock().write_factor_h5(turnover_yearly, "NORMAL_CNE5_LIQUIDITY_STOA", 'barra_risk_dfc')

    turnover = 0.35 * turnover_month + 0.35 * turnover_quarter + 0.3 * turnover_yearly
    turnover = turnover.T.dropna(how='all').T

    size_data = Stock().get_factor_h5("NORMAL_CNE5_SIZE", None, 'barra_risk_dfc')
    [size_data, turnover] = FactorPreProcess().make_same_index_columns([size_data, turnover])

    turnover_res = pd.DataFrame([], index=turnover.index, columns=turnover.columns)

    for i_index in range(len(turnover.columns)):

        date = turnover.columns[i_index]
        print('Calculating Barra Risk factor %s at date %s' % (factor_name, date))
        regression_data = pd.concat([size_data[date], turnover[date]], axis=1)
        regression_data.columns = ['x', 'y']
        regression_data = regression_data.dropna()
        y = regression_data['y'].values
        x = regression_data['x'].values
        x_add = sm.add_constant(x)
        model = sm.OLS(y, x_add).fit()
        regression_data['res'] = regression_data['y'] - model.fittedvalues
        turnover_res[date] = regression_data['res']

    turnover_res = FactorPreProcess().remove_extreme_value_mad(turnover_res)
    turnover_res = FactorPreProcess().standardization_free_mv(turnover_res)
    Stock().write_factor_h5(turnover_res, factor_name, 'barra_risk_dfc')
コード例 #14
0
def weight_top10stock_holding_date(report_date):

    report_date = Date().change_to_str(report_date)
    data = Fund().get_fund_holding_report_date(report_date)
    data = data[['FundCode', 'Weight', 'StockCode']]

    pool = Fund().get_fund_pool_code(report_date, "基金持仓基准基金池")
    fund_code = list(set(pool))
    fund_code.sort()

    weight = Fund().get_wind_fund_asset(report_date)

    for i_fund in range(len(fund_code)):

        fund = fund_code[i_fund]
        data_fund = data[data['FundCode'] == fund]
        data_fund = data_fund.dropna(subset=['Weight'])
        data_fund = data_fund.sort_values(by=['Weight'], ascending=False)

        try:
            asset = weight.ix[fund, report_date]
            asset /= 100000000
        except:
            asset = 1.0

        if i_fund == 0:
            data_fund_top10 = data_fund.iloc[:10, :]
            data_fund_top10["Asset_Weight"] = data_fund_top10['Weight'] * asset
            top10_weight = data_fund_top10['Weight'].sum()
            if top10_weight < 30:
                data_fund_top10 = pd.DataFrame([], columns=data_fund.columns)
        else:
            data_fund_top10_add = data_fund.iloc[:10, :]
            data_fund_top10_add[
                "Asset_Weight"] = data_fund_top10_add['Weight'] * asset
            top10_weight = data_fund_top10_add['Weight'].sum()
            if top10_weight < 30:
                data_fund_top10_add = pd.DataFrame([],
                                                   columns=data_fund.columns)
            data_fund_top10 = pd.concat([data_fund_top10, data_fund_top10_add],
                                        axis=0)

    stock_code = list(set(data_fund_top10['StockCode'].values))
    stock_code.sort()
    weight_sum = data_fund_top10['Asset_Weight'].sum()
    weight_code = pd.DataFrame([], index=stock_code, columns=['Asset_Weight'])

    for i_stock in range(len(stock_code)):
        stock = stock_code[i_stock]
        data_stock = data_fund_top10[data_fund_top10['StockCode'] == stock]
        stock_weight_sum = data_stock['Asset_Weight'].sum()
        weight_code.ix[stock, 'Asset_Weight'] = stock_weight_sum / weight_sum

    weight_code.index = weight_code.index.map(lambda x: x[0:6] + '-CN')

    out_path = Parameter().get_read_file("Fund_Stock_Holding_BenchMark")
    out_path = os.path.join(out_path, "weight_quarter_top10")
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    out_file = os.path.join(out_path,
                            "weight_quarter_top10_" + report_date + '.csv')
    print(out_file)
    weight_code.to_csv(out_file, header=None)
コード例 #15
0
def IlliquidityBias(beg_date, end_date):
    """
    因子说明: 涨跌幅的绝对值 / 交易额
    最近10天均值 / 最近40天均值
    """

    # param
    #################################################################################
    LongTerm = 40
    ShortTerm = 10
    HalfTerm = LongTerm / 2
    factor_name = "IlliquidityBias"
    ipo_num = 90

    # read data
    #################################################################################
    pct = Stock().get_factor_h5("Pct_chg", None, "primary_mfc").T
    trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc").T

    # data precessing
    #################################################################################
    [pct, trade_amount] = Stock().make_same_index_columns([pct, trade_amount])
    trade_amount = trade_amount.fillna(0.0)

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(date_series) & set(pct.index))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date,
                                                     -(LongTerm - 1))
        trade_amount_before = trade_amount.ix[data_beg_date:current_date, :]

        if len(trade_amount_before) > HalfTerm:
            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))
            zero_number = trade_amount_before.applymap(
                lambda x: 1.0 if x == 0.0 else 0.0).sum()
            code_filter_list = (zero_number[zero_number < ShortTerm]).index
            amount_before = trade_amount.ix[data_beg_date:current_date,
                                            code_filter_list]
            pct_before = pct.ix[data_beg_date:current_date, code_filter_list]
            illiq = pct_before.abs().div(amount_before,
                                         axis='index') * 100000000
            illiq[illiq > 100.0] = np.nan
            illiq_bias = illiq.ix[-1 - ShortTerm:, :].mean() / illiq.mean()
            price_mean = pd.DataFrame(illiq_bias.values,
                                      columns=[current_date],
                                      index=illiq_bias.index)
        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))
            price_mean = pd.DataFrame([],
                                      columns=[current_date],
                                      index=trade_amount_before.columns)

        if i == 0:
            res = price_mean
        else:
            res = pd.concat([res, price_mean], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #16
0
ファイル: mail_exposure.py プロジェクト: easy00000000/quant
        '*****@*****.**', '*****@*****.**',
        '*****@*****.**'
    ]
    acc_mail_name = []
    subject_header = "指数基金风格暴露周报%s_自动发送" % last_date

    email = EmailSender()
    exposure_file = os.path.join(path, xlsx_name)
    email.attach_file(exposure_file)
    email.attach_picture_inside_body("最近交易日风格暴露" + last_date,
                                     os.path.join(path, pic_name + '.png'))
    email.attach_picture_inside_body(
        "最近半年报风格暴露" + report_date_halfyear,
        os.path.join(path, pic_name_halfyear + '.png'))
    email.send_mail_mfcteda(sender_mail_name, receivers_mail_name,
                            acc_mail_name, subject_header)
    ################################################################################


if __name__ == '__main__':

    ################################################################################
    path = 'E:\\Data\\fund_data\\fund_index_exposure_weekly\\'
    # Date().load_trade_date_series_all()

    today = datetime.today().strftime("%Y%m%d")
    last_date = Date().get_trade_date_offset(today, -1)
    report_date_halfyear = Date().get_last_fund_halfyear_date(today)
    print(" 最近半年报是 %s 最近一个交易日为 %s " % (report_date_halfyear, last_date))

    mail_exposure(path, last_date, report_date_halfyear)
コード例 #17
0
ファイル: Resistance.py プロジェクト: rlcjj/quant
def Resistance(beg_date, end_date):
    """
    因子说明:股票上行阻力 国泰君安
    阻力比例  resistance_ratio=resistance_num/power_num
    相当于绝对阻力除以总力量,是-1至1之间数字,1为全部是向上阻力,-1为全部是向下阻力
    绝对阻力  resistance_num=sum(sign(pi-p)*Vi*w1i*w2i)  ,相当于多头阻力减去空头阻力
    双向力量和  power_num=sum(Vi*w1i*w2i)   ,相当于多头阻力加上空头阻力
    其中
    w1i=ln(p/abs(pi-p)),空间距离,价格距离越远作用越小
    w2i=ln(1+i)/ln(1+N),时间距离,价格距离越近作用越大
    pi为i日前价格,Vi为i日交易额,N为时间区间长度

    """

    # param
    #################################################################################
    LongTerm = 120
    MinimumSize = 96
    factor_name = "Resistance"
    ipo_num = 90

    # read data
    #################################################################################
    price = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc")
    trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc")

    # data precessing
    #################################################################################
    [trade_amount,
     price] = Stock().make_same_index_columns([trade_amount, price])

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(date_series) & set(price.columns))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date,
                                                     -(LongTerm - 1))
        price_before = price.ix[:, data_beg_date:current_date]
        price_before = price_before.T.dropna(how='all').T
        pct_current = price.ix[:, current_date]
        trade_amount_before = trade_amount.ix[:, data_beg_date:current_date]
        trade_amount_before = trade_amount_before.T.dropna(how='all').T

        if len(price_before) > MinimumSize:
            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))
            price_sub_abs = price_before.sub(pct_current, axis='index').abs()
            W1 = np.log(1 / price_sub_abs.mul(1 / pct_current, axis='index'))

            # 要扣除价格等于当前价格情况,此情况下空间权为0
            W1[np.isinf(W1)] = 0.0
            N = len(price_before.columns)
            L = len(price_before)

            Weight = np.array(range(1, 1 + N)) / np.array(range(1,
                                                                1 + N)).sum()
            W2 = pd.DataFrame(np.tile(Weight, (L, 1)),
                              index=price_before.index,
                              columns=price_before.columns)

            TotalPower = trade_amount_before.mul(W1).mul(W2)
            Sign = np.sign(price_before.sub(pct_current, axis='index'))
            ResistancePower = Sign.mul(TotalPower)

            ratio = ResistancePower.sum(axis=1) / TotalPower.sum(axis=1)
            ratio = pd.DataFrame(ratio.values,
                                 index=ratio.index,
                                 columns=[current_date])

        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))
            ratio = pd.DataFrame([],
                                 columns=[current_date],
                                 index=trade_amount_before.index)

        if i == 0:
            res = ratio
        else:
            res = pd.concat([res, ratio], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #18
0
ファイル: VolumeUpRatio.py プロジェクト: rlcjj/quant
def VolumeUpRatio(beg_date, end_date):

    """
    因子说明:1*以当日收盘价为下限 当日收盘价*1.1为上限,
    过去120天的在上下限之间的天的成交额的总和占过去120天总成交额的比例
    最后乘以 -1
    注意:补齐nan为0,,去掉过去120天超过60天交易额为0的股票
    """

    # param
    #################################################################################
    LongTerm = 120
    HalfTerm = int(LongTerm / 2)
    PctRange = 0.1
    factor_name = "VolumeUpRatio"
    ipo_num = 90

    # read data
    #################################################################################
    close = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc").T
    trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc").T

    # data precessing
    #################################################################################
    [close, trade_amount] = Stock().make_same_index_columns([close, trade_amount])
    trade_amount = trade_amount.fillna(0.0)

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(trade_amount.index) & set(date_series))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm-1))
        amount_before = trade_amount.ix[data_beg_date:current_date, :]

        if len(amount_before) >= int(0.8*LongTerm):

            print('Calculating factor %s at date %s' % (factor_name, current_date))
            zero_number = amount_before.applymap(lambda x: 1.0 if x == 0.0 else 0.0).sum()
            code_filter_list = (zero_number[zero_number < HalfTerm]).index

            close_low_limit = close.ix[current_date, code_filter_list]
            close_up_limit = close.ix[current_date, code_filter_list] * (1 + PctRange)

            close_before = close.ix[data_beg_date:current_date, code_filter_list]
            price_center = (close_before > close_low_limit) & (close_before < close_up_limit)
            trade_amount_filter_sum = amount_before[price_center].sum()
            trade_amount_sum = amount_before.sum()

            trade_amount_sum = pd.concat([trade_amount_filter_sum, trade_amount_sum], axis=1)
            trade_amount_sum.columns = ['filter_sum', 'sum']
            trade_amount_sum = trade_amount_sum[trade_amount_sum['sum'] != 0.0]
            trade_amount_sum['ratio'] = - trade_amount_sum['filter_sum'] / trade_amount_sum['sum']
        else:
            print('Calculating factor %s at date %s is null' % (factor_name, current_date))
            trade_amount_sum = pd.DataFrame([], columns=['ratio'], index=amount_before.columns)

        if i == 0:
            res = pd.DataFrame(trade_amount_sum['ratio'].values, columns=[current_date], index=trade_amount_sum.index)
        else:
            res_add = pd.DataFrame(trade_amount_sum['ratio'].values, columns=[current_date], index=trade_amount_sum.index)
            res = pd.concat([res, res_add], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #19
0
    def risk_factor_performance(self,
                                factor_name,
                                stock_pool_name="AllChinaStockFilter",
                                beg_date=None,
                                end_date=None,
                                period='M'):

        """ 计算单风险因子的因子收益率波动率、自相关性、T值大于2的比例等等 找到有定价能力的风险因子 """

        exposure = self.get_risk_factor_exposure(factor_name)
        price = Stock().read_factor_h5("Price_Unadjust")
        num = Date().get_period_number_for_year(period)

        if beg_date is None:
            beg_date = exposure.columns[0]
        if end_date is None:
            end_date = exposure.columns[-1]

        date_series = Date().get_trade_date_series(beg_date, end_date, period)
        date_series = list(set(date_series) & set(exposure.columns) & set(price.columns))
        date_series.sort()

        factor_return = pd.DataFrame([], index=date_series, columns=['因子收益率'])

        for i_date in range(0, len(date_series)-1):

            cur_date = date_series[i_date]
            buy_date = cur_date
            sell_date = date_series[i_date + 1]
            stock_list = Stock().get_invest_stock_pool(stock_pool_name, cur_date)
            stock_pct = price[sell_date] / price[buy_date] - 1.0
            exposure_date = exposure[cur_date]
            exposure_next = exposure[sell_date]

            data = pd.concat([exposure_date, exposure_next], axis=1)
            data = data.dropna()
            stock_list_finally = list(set(stock_list) & set(data.index))
            stock_list_finally.sort()
            data = data.loc[stock_list_finally, :]
            auto_corr = data.corr().iloc[0, 1]

            data = pd.concat([exposure_date, stock_pct], axis=1)
            stock_list_finally = list(set(stock_list) & set(data.index))
            stock_list_finally.sort()
            data = data.loc[stock_list_finally, :]
            data.columns = ['x', 'y']
            data = data.dropna()

            if len(data) > 0:

                print("Risk Factor  %s %s %s" % (factor_name, stock_pool_name, cur_date))
                y = data['y'].values
                x = data['x'].values
                x_add = sm.add_constant(x)
                model = sm.OLS(y, x_add).fit()

                factor_return_date = model.params[1]
                rank_corr = data.corr(method="spearman").iloc[0, 1]
                t_value = model.tvalues[1]
                r2 = model.rsquared_adj

                factor_return.loc[cur_date, '因子收益率'] = factor_return_date
                factor_return.loc[cur_date, 'IC'] = rank_corr
                factor_return.loc[cur_date, 'T值'] = t_value
                factor_return.loc[cur_date, '自相关系数'] = auto_corr
                factor_return.loc[cur_date, 'R2'] = r2
            else:
                print("Risk Factor is Null %s %s %s" % (factor_name, stock_pool_name, cur_date))

        factor_return = factor_return.dropna(subset=['因子收益率', 'T值'])
        factor_return['因子累计收益率'] = factor_return['因子收益率'].cumsum()

        factor_return_mean = factor_return['因子收益率'].mean() * num
        factor_return_std = factor_return['因子收益率'].std() * np.sqrt(num)

        rank_ic_mean = factor_return['IC'].mean()
        rank_ic_ir = rank_ic_mean / factor_return['IC'].std() * np.sqrt(num)

        if len(factor_return) > 0:

            abs_t_2_ratio = len(factor_return[factor_return['T值'].abs() > 2]) / len(factor_return)
            data_beg_date = factor_return.index[0]
            data_end_date = factor_return.index[-1]
            abs_t_mean = factor_return['T值'].abs().mean()
            auto_corr_mean = factor_return['自相关系数'].mean()
            r2_mean = factor_return['R2'].mean()

            summary = pd.DataFrame([], columns=['因子表现'])
            summary.loc['因子年化收益率', "因子表现"] = factor_return_mean
            summary.loc['因子年化波动率', "因子表现"] = factor_return_std
            summary.loc['IC均值', "因子表现"] = rank_ic_mean
            summary.loc['ICIR', "因子表现"] = rank_ic_ir
            summary.loc['平均R2', "因子表现"] = r2_mean
            summary.loc['T值绝对值大于2的比率', "因子表现"] = abs_t_2_ratio
            summary.loc['T值绝对值平均数', "因子表现"] = abs_t_mean
            summary.loc['自相关系数平均', "因子表现"] = auto_corr_mean
            summary.loc['期数', "因子表现"] = str(len(factor_return))
            summary.loc['开始日期', "因子表现"] = data_beg_date
            summary.loc['结束日期', "因子表现"] = data_end_date

            path = os.path.join(self.factor_performance_path, stock_pool_name)
            if not os.path.exists(path):
                os.makedirs(path)
            file = os.path.join(path, 'Summary_%s.xlsx' % factor_name)

            excel = WriteExcel(file)
            num_format_pd = pd.DataFrame([], columns=summary.columns, index=['format'])
            num_format_pd.loc['format', :] = '0.00%'

            worksheet = excel.add_worksheet(factor_name)
            excel.write_pandas(summary, worksheet, begin_row_number=0, begin_col_number=1,
                               num_format_pd=num_format_pd, color="red", fillna=True)

            num_format_pd = pd.DataFrame([], columns=factor_return.columns, index=['format'])
            num_format_pd.loc['format', :] = '0.00%'
            excel.write_pandas(factor_return, worksheet, begin_row_number=0, begin_col_number=4,
                               num_format_pd=num_format_pd, color="red", fillna=True)
            excel.close()

        else:
            print("Risk Factor %s is Null in %s" % (factor_name, stock_pool_name))
コード例 #20
0
def TestFundAlphaFactor(name, periods="Q"):

    # 参数 shift_name 为后置一期
    ###########################################################################################
    # name = "FundHolderQuarter_AlphaReturnMean_480"
    # periods = "Q"
    path = r'E:\3_Data\4_fund_data\2_fund_factor'
    group_number = 10

    # 读取基金每个季度的数据
    ###########################################################################################
    fund_nav = Fund().get_fund_factor("Repair_Nav", None, None).T

    # 基金 alpha factor 因子值
    ###########################################################################################
    file = os.path.join(path, "exposure", name + '.csv')
    values = pd.read_csv(file, index_col=[0], encoding='gbk')
    values.columns = values.columns.map(str)
    values.columns = values.columns.map(lambda x: Date().get_trade_date_offset(x, 0))
    values = FactorPreProcess().remove_extreme_value_mad(values)
    values = FactorPreProcess().standardization(values)
    label = ["Group_" + str(x) for x in range(1, 1 + group_number)]
    result = pd.DataFrame([], index=values.columns, columns=label)

    # 回测日期
    ###########################################################################################
    backtest_date_series = Date().get_trade_date_series("20040101", datetime.today(), periods)
    values_date_series = list(map(str, list(values.columns)))
    nav_date_series = list(map(str, list(fund_nav.columns)))

    date_series = list(set(backtest_date_series) & set(values_date_series) & set(nav_date_series))
    date_series.sort()

    # 每期做截面回归 并分组
    ###########################################################################################
    for i in range(len(date_series)-1):

        # 确定日期
        ###########################################################################################
        date = date_series[i]
        next_date = date_series[i + 1]
        val = pd.DataFrame(values[date])
        val_next = pd.DataFrame(values[next_date])
        pct = pd.DataFrame(fund_nav[next_date] / fund_nav[date] - 1.0)

        # 合并数据
        ###########################################################################################
        data = pd.concat([val, pct], axis=1)
        data = data.loc[~data.index.duplicated(), :]
        data.columns = ['val', 'pct']
        data = data.dropna()
        data['rank_val'] = data['val'].rank()
        data['rank_pct'] = data['pct'].rank()

        if len(data) > 10:

            try:
                print(name, date)
                # 取得数据
                ###########################################################################################
                x = data['val']
                y = data['pct']

                # 回归求取因子收益率 IC
                ###########################################################################################
                x_add = sm.add_constant(x)
                model = sm.OLS(y, x_add).fit()
                beta = model.params[1]
                result.loc[date, 'AlphaFactor'] = beta
                result.loc[date, "IC"] = data.corr().iloc[0, 1]
                result.loc[date, "RankIC"] = data["rank_val"].corr(data["rank_pct"])

                # 按照因子值排序 求分组平均收益
                ###########################################################################################
                data = data.sort_values(by=['val'], ascending=False)
                data['group'] = pd.qcut(data['val'], group_number, labels=label)
                gb = data.groupby(by=["group"]).mean()["pct"]
                result.loc[date, label] = gb
                mean = gb.mean()
                result.loc[date, "G1ExcessReturn"] = gb.loc["Group_1"] - mean
                result.loc[date, "G10ExcessReturn"] = gb.loc["Group_10"] - mean
                corr_data = pd.concat([val, val_next], axis=1)
                result.loc[date, "AutoCorr"] = corr_data.corr().iloc[0, 1]
            except Exception as e:
                pass
            ###########################################################################################
        else:
            pass

    ###########################################################################################
    result = result.dropna()

    # 计算累计收益率
    ###########################################################################################
    for i_col in range(len(result.columns)):
        col = result.columns[i_col]
        result['Cum' + col] = result[col].cumsum()

    # 存储数据
    ###########################################################################################
    file = name + "BackTestReturn_" + str(periods) + ".csv"
    file = os.path.join(path, "alpha_factor_test_result", file)
    result.to_csv(file)
コード例 #21
0
ファイル: fund_rank.py プロジェクト: easy00000000/quant
    def rank_fund(self,
                  fund_code,
                  rank_pool,
                  beg_date,
                  end_date,
                  new_fund_date=None,
                  excess=False):
        """
        计算某只基金在基金池的排名
        三种排名方式
        1、直接获取wind接口结果
        2、自己给定基金池,从wind接口获取基金绝对收益
        3、基金给定基金池,从wind接口获取基金超额收益
        """

        if new_fund_date is None:
            new_fund_date = beg_date

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)
        new_fund_date = Date().change_to_str(new_fund_date)

        print(" 正在计算基金排名 %s 在基金池 %s 从 %s 到 %s " %
              (fund_code, rank_pool, beg_date, end_date))

        # 分类获取排名
        if rank_pool == 'wind':

            # Wind 三级分类
            date_str = "startDate=%s;endDate=%s;fundType=3" % (beg_date,
                                                               end_date)
            data = w.wss(fund_code, "peer_fund_return_rank_per", date_str)
            val = str(data.Data[0][0])
            data = w.wss(fund_code, "peer_fund_return_rank_prop_per", date_str)

            try:
                pct = np.round(data.Data[0][0] / 100.0, 3)
            except Exception as e:
                print(e)
                print("wind返回基金排名百分比非数字")
                pct = "None"
            return val, pct

        else:

            # 获取基金池
            pool = FundPool().get_fund_pool_all(date="20181231",
                                                name=rank_pool)
            bool_series = (pool['if_connect'] == '非联接基金') & (pool['if_hk']
                                                             == '非港股基金')
            bool_series &= (pool['if_a'] == 'A类基金')
            bool_series &= (pool['if_etf'] == '非ETF基金')
            pool = pool[bool_series]
            fund_code_str = ','.join(pool.index.values)

            if not excess:

                # 区间总收益排名
                data = w.wss(fund_code_str, "NAV_adj_return",
                             "startDate=%s;endDate=%s" % (beg_date, end_date))
                data = pd.DataFrame(data.Data,
                                    columns=data.Codes,
                                    index=['NAV_adj_return']).T
                data = data[~data.index.duplicated()]
                data = pd.concat([data, pool], axis=1)
                data = data[data["setupdate"] <= new_fund_date]
                data = data.dropna(subset=['NAV_adj_return'])
                data = data.sort_values(by='NAV_adj_return', ascending=False)

                data['range'] = range(len(data))
                data["rank"] = data['range'].map(
                    lambda x: str(x + 1) + "/" + str(len(data)))
                data['rank_pct'] = data['range'].map((lambda x:
                                                      (x + 1) / len(data)))
                try:
                    val = data.loc[fund_code, "rank"]
                    pct = data.loc[fund_code, "rank_pct"]
                    pct = np.round(pct, 3)
                    file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date,
                                                end_date)
                    file = os.path.join(self.data_path, file)
                    data.to_csv(file)
                except Exception as e:
                    print(e)
                    val = "None"
                    pct = "None"
                return val, pct
            else:

                # 区间超额收益排名
                date_str = "startDate=%s;endDate=%s" % (beg_date, end_date)
                data = w.wss(fund_code_str, "NAV_over_bench_return_per",
                             date_str)
                data = pd.DataFrame(data.Data,
                                    columns=data.Codes,
                                    index=['NAV_over_bench_return_per']).T
                data = pd.concat([data, pool], axis=1)
                data = data[data["setupdate"] <= new_fund_date]
                data = data.dropna(subset=['NAV_over_bench_return_per'])
                data = data.sort_values(by='NAV_over_bench_return_per',
                                        ascending=False)

                data['range'] = range(len(data))
                data["rank"] = data['range'].map(
                    lambda x: str(x + 1) + "/" + str(len(data)))
                data['rank_pct'] = data['range'].map((lambda x:
                                                      (x + 1) / len(data)))

                try:
                    val = data.loc[fund_code, "rank"]
                    pct = data.loc[fund_code, "rank_pct"]
                    pct = np.round(pct, 3)
                    file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date,
                                                end_date)
                    file = os.path.join(self.data_path, file)
                    data.to_csv(file)
                except Exception as e:
                    print(e)
                    val = "None"
                    pct = "None"
                return val, pct
コード例 #22
0
ファイル: EMA5.py プロジェクト: rlcjj/quant
def EMA5(beg_date, end_date):
    """
    因子说明: 最近5天价格平均 时间越近权重越大
    这里的权重为等差数列 并非指数加权平均(即权重为等比数列)
    """

    # param
    #################################################################################
    LongTerm = 5
    factor_name = "EMA5"
    ipo_num = 90

    # read data
    #################################################################################
    close = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc")

    # data precessing
    #################################################################################
    pass

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(date_series) & set(close.columns))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date,
                                                     -(LongTerm - 1))
        price_before = close.ix[:, data_beg_date:current_date]
        price_stock = price_before.T.dropna(how='all')

        if len(price_stock) == LongTerm:
            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))

            weight = np.array(list(range(1, LongTerm + 1)))
            weight = weight * 2 / (1 + LongTerm)
            weight = weight / weight.sum()
            price_mean = pd.DataFrame(np.dot(weight, price_stock.values),
                                      columns=[current_date],
                                      index=price_stock.columns)
        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))
            price_mean = pd.DataFrame([],
                                      columns=[current_date],
                                      index=price_stock.columns)

        if i == 0:
            res = price_mean
        else:
            res = pd.concat([res, price_mean], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #23
0
    def cal_fund_regression_exposure_style(self,
                                           fund,
                                           beg_date,
                                           end_date,
                                           period="D"):

        # 参数
        ####################################################################
        up_style_exposure = 1.25
        up_position_exposure = 0.95
        low_position_exposure = 0.75
        position_sub = 0.08

        beg_date = Date().change_to_str(beg_date)
        end_date = Date().change_to_str(end_date)

        # 取得数据 因子收益率数据 和 基金涨跌幅数据
        ####################################################################
        type_list = ['STYLE', 'COUNTRY']

        barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values)
        barra_return = Barra().get_factor_return(None, None, type_list)

        date_series = Date().get_trade_date_series(beg_date,
                                                   end_date,
                                                   period=period)

        if fund[len(fund) - 2:] == 'OF':
            fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None,
                                                       [fund])
        else:
            fund_return = Index().get_index_factor(fund, attr=["PCT"]) * 100
            fund_return.columns = [fund]

        data = pd.concat([fund_return, barra_return], axis=1)
        data = data.dropna()
        print(" Fund Code Total Len %s " % len(data))
        factor_number = len(barra_name)
        stock_ratio = FundFactor().get_fund_factor("Stock_Ratio", None,
                                                   [fund]) / 100

        date_series = list(set(date_series) & set(data.index))
        date_series.sort()

        # 循环回归计算每天的暴露 计算当天的暴露之时需要 前一天及之前数据
        ####################################################################

        for i_date in range(0, len(date_series)):

            # 回归所需要的数据
            ####################################################################
            period_end_date = date_series[i_date]
            period_beg_date = Date().get_trade_date_offset(
                period_end_date, -self.regression_period)
            data_end_date = Date().get_trade_date_offset(period_end_date, -0)

            period_date_series = Date().get_trade_date_series(
                period_beg_date, data_end_date)
            data_periods = data.ix[period_date_series, :]
            data_periods = data_periods.dropna()

            # 上个季度基金仓位
            #####################################################################################
            quarter_date = Date().get_last_fund_quarter_date(period_end_date)
            stock_ratio_fund = stock_ratio.loc[quarter_date, fund]
            print(
                "########## Calculate Regression Exposure %s %s %s %s %s %s ##########"
                % (fund, period_beg_date, period_end_date, quarter_date,
                   len(data_periods), stock_ratio_fund))

            if len(data_periods) > self.regression_period_min:

                y = data_periods.ix[:, 0].values
                x = data_periods.ix[:, 1:].values
                x_add = sm.add_constant(x)

                low_position_exposure = max(stock_ratio_fund - position_sub,
                                            low_position_exposure)
                if np.isnan(low_position_exposure):
                    low_position_exposure = 0.75

                P = 2 * np.dot(x_add.T, x_add)
                Q = -2 * np.dot(x_add.T, y)

                G_up = np.diag(np.ones(factor_number + 1))
                G_low = -np.diag(np.ones(factor_number + 1))
                G = np.row_stack((G_up, G_low))
                h_up = np.row_stack((np.ones(
                    (factor_number, 1)) * up_style_exposure,
                                     np.array([up_position_exposure])))
                h_low = np.row_stack((np.ones(
                    (factor_number, 1)) * up_style_exposure,
                                      np.array([-low_position_exposure])))
                h = np.row_stack((h_up, h_low))

                P = matrix(P)
                Q = matrix(Q)
                G = matrix(G)
                h = matrix(h)
                try:
                    result = sol.qp(P, Q, G, h)
                    params_add = pd.DataFrame(np.array(result['x'][1:]),
                                              columns=[period_end_date],
                                              index=barra_name).T
                    print(params_add)
                except Exception as e:
                    params_add = pd.DataFrame([],
                                              columns=[period_end_date],
                                              index=barra_name).T
                    print(params_add)

            else:
                params_add = pd.DataFrame([],
                                          columns=[period_end_date],
                                          index=barra_name).T
                print(params_add)

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=0)

        # 合并新数据
        ####################################################################
        out_path = os.path.join(self.data_path_exposure,
                                'fund_regression_exposure_style')
        out_file = os.path.join(
            out_path, 'Fund_Regression_Exposure_Style_' + fund + '.csv')

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new
        print(params)
        params.to_csv(out_file)
コード例 #24
0
    def cal_style_position(self, beg_date, end_date, code):
        """ 计算一个基金或指数的风格仓位和仓位 利用OLS有约束回归 """

        x_pct = self.data_return[self.index_code_list]
        x_pct = x_pct.dropna(how='all')
        y_pct = pd.DataFrame(self.data_return[code])
        y_pct = y_pct.dropna()

        all_date_series = Date().get_trade_date_series(beg_date,
                                                       end_date,
                                                       period="D")
        y_series = Date().get_trade_date_series(y_pct.index[0],
                                                y_pct.index[-1])
        date_series = list(set(y_series) & set(all_date_series))
        date_series.sort()
        error = False

        for i_date in range(len(date_series)):

            ed_date = date_series[i_date]
            bg_date = Date().get_trade_date_offset(ed_date,
                                                   -self.regress_length)
            last_date = Date().get_trade_date_offset(ed_date, -1)

            x_pct_period = x_pct.loc[bg_date:ed_date, :]
            x_pct_period = x_pct_period.T.dropna().T
            x_columns = x_pct_period.columns
            data = pd.concat([y_pct, x_pct_period], axis=1)
            data = data.dropna()

            # 如果是第一天或者上次结果错误 则开放换手率 并假定上次平均持仓

            if i_date != 0:
                turnover_daily = self.turnover_daily
                old_weight = old_weight.loc[x_columns, :]
                old_weight = old_weight.fillna(0.0)
            else:
                n = len(x_columns)
                old_weight = pd.DataFrame(n * [1.0 / n],
                                          index=x_columns,
                                          columns=[last_date])
                turnover_daily = 2.0

            if error:
                n = len(x_columns)
                old_weight = pd.DataFrame(n * [1.0 / n],
                                          index=x_columns,
                                          columns=[last_date])
                turnover_daily = 2.00

            # print(error, old_weight.columns)
            print("## Cal Regress %s %s %s %s TurnOver %s##" %
                  (code, bg_date, ed_date, data.shape, turnover_daily))

            if len(data) >= self.regress_length_min:
                y = data[code].values
                x = data.iloc[:, 1:].values
                k = x.shape[1]
                old = old_weight.T.values[0]

                try:
                    w = cvx.Variable(k)
                    sigma = y - x * w
                    prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [
                        cvx.sum(w) == 1.0,
                        cvx.sum(w[1:]) >= self.stock_ratio_low,
                        cvx.sum(w[1:]) <= self.stock_ratio_up,
                        cvx.sum(cvx.abs(w - old)) <= turnover_daily, w >= 0
                    ])
                    prob.solve()

                    print('Solver Status : ', prob.status)
                    params_add = pd.DataFrame(w.value,
                                              columns=[ed_date],
                                              index=x_columns)
                    stock_sum = params_add.loc[self.index_code_list[1:],
                                               ed_date].sum()
                    concat_data = pd.concat([params_add, old_weight], axis=1)
                    concat_data = concat_data.dropna()
                    turnover_real = (concat_data[last_date] -
                                     concat_data[ed_date]).abs().sum()

                    params_add.loc['StockRatio', ed_date] = stock_sum
                    params_add.loc['BondRatio', ed_date] = params_add.loc[
                        self.index_code_list[0], ed_date]
                    params_add.loc['TurnOverDaily', ed_date] = turnover_real
                    print(params_add.T)
                    old_weight = params_add
                    error = False
                except Exception as e:
                    print(end_date, code, "回归失败")
                    error = True
            else:
                print(end_date, code, "数据长度不够")
                error = True

            if i_date == 0:
                params_new = params_add
            else:
                params_new = pd.concat([params_new, params_add], axis=1)

        # 合并新数据
        ####################################################################
        params_new = params_new.T
        out_file = os.path.join(self.data_path,
                                'RestraintOLSStylePosition_%s.csv' % code)

        if os.path.exists(out_file):
            params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk')
            params_old.index = params_old.index.map(str)
            params = FactorOperate().pandas_add_row(params_old, params_new)
        else:
            params = params_new

        params.to_csv(out_file)
コード例 #25
0
ファイル: BetaInverse.py プロジェクト: rlcjj/quant
def BetaInverse(beg_date, end_date):

    """
    因子说明:利用回归方法计算个股Beta 但是个股收益和指数收益换过来位置
    Beta_inverse = Corr * BenchMark_Std / Stock_Std
    市场收益的股票平均收益
    """

    # param
    #################################################################################
    LongTerm = 120
    MinimumSize = 40
    factor_name = "BetaInverse"
    ipo_num = 90

    # read data
    #################################################################################
    pct = Stock().get_factor_h5("Pct_chg", None, "primary_mfc")

    # data precessing
    #################################################################################
    pass

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(pct.columns) & set(date_series))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm-1))
        pct_before = pct.ix[:, data_beg_date:current_date]
        pct_stock = pct_before.T.dropna(how='all')
        pct_average = pct_stock.mean(axis=1)

        if len(pct_stock) > MinimumSize:
            print('Calculating factor %s at date %s' % (factor_name, current_date))
            corr_date = pct_stock.corrwith(pct_average)
            std_stock = pct_stock.std()
            std_market = pct_average.std()
            beta = corr_date * std_market / std_stock
            effective_number = pct_stock.count()
            beta[effective_number <= MinimumSize] = np.nan
            corr_date = pd.DataFrame(corr_date.values, columns=[current_date], index=corr_date.index)
        else:
            print('Calculating factor %s at date %s is null' % (factor_name, current_date))
            corr_date = pd.DataFrame([], columns=[current_date], index=pct.index)

        if i == 0:
            res = corr_date
        else:
            res = pd.concat([res, corr_date], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
コード例 #26
0
        buf_size = 1024
        fp = open(local_file, 'rb')
        self.ftp.storbinary('STOR ' + file_name, fp, buf_size)

    def upload_folder(self, ftp_folder):
        """ 上传文件夹 """

        print('Begin UpLoading %s ......' % ftp_folder)
        try:
            self.ftp.cwd(ftp_folder)
        except Exception as e:
            self.ftp.mkd(ftp_folder)
            self.ftp.cwd(ftp_folder)


if __name__ == '__main__':

    from quant.mfc.mfc_data import MfcData
    from datetime import datetime

    date = Date().change_to_str(datetime.today())

    ftp_path = os.path.join(MfcData().ftp_path, date)
    local_path = os.path.join(MfcData().data_path, date)

    if not os.path.exists(local_path):
        os.mkdir(local_path)

    ftp = MyFtp()
    ftp.connect()
コード例 #27
0
    5、中证500基金优选500AlphaIR
    """

    # 更新基金Alpha
    # self = NiceStockFund()
    # self.update_data()

    # 1、主动股票型基金优选750AlphaIR
    fund_pool_name = "基金持仓基准基金池"
    benchmark_name = "基金持仓基准基金池"
    alpha_len = 750
    alpha_column = "RegressAlphaIR"
    port_name = "主动股票型基金优选750AlphaIR"
    beg_date = "20181201"
    end_date = "20190404"
    end_last_date = Date().get_trade_date_offset(end_date, -1)
    bench_code = "885000.WI"
    style_deviate = 0.20
    position_deviate = 0.02
    fund_up_ratio = 0.15
    turnover = 0.25

    self = NiceStockFund(fund_pool_name, benchmark_name, alpha_len, alpha_column, port_name,
                         style_deviate, position_deviate, fund_up_ratio, turnover)
    # self.cal_fund_factor_alldate(beg_date, end_date)
    # self.opt_alldate(beg_date, end_date, turnover_control=True)
    # self.upload_all_wind_port()
    # self.backtest(bench_code)

    # self.cal_fund_factor_date(end_date)
    # self.opt_date(end_date, end_last_date, turnover_control=True)
コード例 #28
0
ファイル: market.py プロジェクト: easy00000000/quant
    def get_all_data(self, beg_date, end_date):

        """ 得到数据 (这里只取年报和半年报数据在,注意区分二季报和四季报)"""

        share_fund = Stock().read_factor_h5("HolderTotalByFund")  # 基金
        share_inst = Stock().read_factor_h5("HolderTotalByInst")  # 机构
        share_general_corp = Stock().read_factor_h5("HolderTotalByGeneralCorp")  # 一般法人
        share_hf = Stock().read_factor_h5("HolderTotalByHF")  # 私募
        share_qfii = Stock().read_factor_h5("HolderTotalByQFII")  # Qfii
        share_social_security = Stock().read_factor_h5("HolderTotalBySocialSecurity")  # 社保
        share_insurance = Stock().read_factor_h5("HolderTotalByInsurance")  # 保险

        halfyear_date = Date().get_last_fund_halfyear_date(end_date)
        date_series = Date().get_normal_date_series(Date().get_trade_date_offset(beg_date, -200), halfyear_date, "S")
        print(date_series)
        share_fund = share_fund[date_series]
        share_inst = share_inst[date_series]
        share_general_corp = share_general_corp[date_series]
        share_hf = share_hf[date_series]
        share_qfii = share_qfii[date_series]
        share_social_security = share_social_security[date_series]
        share_insurance = share_insurance[date_series]

        share_hk = Stock().read_factor_h5("HK2CHoldShare") / 1000000  # 陆股通
        share_hk = share_hk.T.fillna(method="pad", limit=3).T

        share_all = Stock().read_factor_h5("Share_TotalA") / 100000000  # 全A
        price_unadjust = Stock().read_factor_h5("PriceCloseUnadjust")  # 不复权价格
        share_free = Stock().read_factor_h5("Free_FloatShare")/ 100000000
        print(share_all.columns)
        result = pd.DataFrame([])

        date_series_data = Date().get_normal_date_series(beg_date, end_date, "M")

        for i_date in range(len(date_series_data)):

            date = date_series_data[i_date]
            price_date = price_unadjust.columns[price_unadjust.columns <= date][-1]
            share_date = share_fund.columns[share_fund.columns <= date][-1]
            print(date, price_date, share_date)

            try:
                share_hk[price_date]
            except Exception as e:
                share_hk.loc[:, price_date] = np.nan

            data = pd.concat([share_fund[share_date], share_inst[share_date], share_general_corp[share_date],
                              share_hf[share_date], share_qfii[share_date], share_social_security[share_date],
                              share_insurance[share_date], share_hk[price_date],
                              share_all[price_date], share_free[price_date], price_unadjust[price_date]], axis=1)

            data.columns = ['公募基金', '机构', '一般法人', '私募', 'QFII', '社保', '保险', '陆股通', '全A', "流通", '价格']
            col = ['公募基金', '机构', '一般法人', '私募', 'QFII', '社保', '保险', '陆股通', '全A', "流通"]
            data_mv = data[col].mul(data['价格'], axis='index')
            data_mv_sum = pd.DataFrame(data_mv.sum())
            data_mv_sum.columns = [date]

            data_mv_sum = data_mv_sum.T
            result = pd.concat([result, data_mv_sum], axis=0)

        result["总和"] = result[['公募基金', 'QFII', '社保', '保险', '陆股通']].sum(axis=1)
        result["总和(剔除保险)"] = result[['公募基金', 'QFII', '社保', '陆股通']].sum(axis=1)
        ratio = result.div(result['全A'], axis='index')

        ratio_free = result.div(result['流通'], axis='index')

        return result, ratio, ratio_free
コード例 #29
0
    def generate_excel(self, end_date):

        """ 陆股通信息 输出到Excel """

        beg_date = Date().get_trade_date_offset(end_date, -60)
        beg_1m_date = Date().get_trade_date_offset(end_date, -20)
        beg_1m_date = "20190201"

        # 一段时间内增减持额时间序列
        from quant.stock.index import Index
        index_data = Index().get_index_factor(index_code="000300.SH")
        date_series = Date().get_trade_date_series(beg_date, end_date, "W")
        result = pd.DataFrame([])

        ed_date = end_date
        for i in range(len(date_series)-1):
            bg_date = Date().get_trade_date_offset(ed_date, -5)
            print("Hk Inflow Period %s %s" % (bg_date, ed_date))
            result_add = self.hk_inflow_period(bg_date, ed_date)
            result_add.loc[ed_date, '沪深300'] = index_data.loc[ed_date, "CLOSE"]
            result = pd.concat([result, result_add], axis=0)
            ed_date = bg_date

        result = result.sort_index()
        # 最近一个月平均持股金额最大、最小的几个股票
        stock = self.hk_inflow_period_stock(beg_1m_date, end_date)

        # 最近一个月平均持股金额行业排序
        industry = self.hk_inflow_period_industry(beg_1m_date, end_date)

        # 数据存贮位置
        sub_path = os.path.join(self.data_path, end_date)
        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        # 信息写入文件
        filename = os.path.join(sub_path, '融资融券情况春节后.xlsx')
        print(filename)
        excel = WriteExcel(filename)
        sheet_name = "融资融券情况"
        worksheet = excel.add_worksheet(sheet_name)

        num_format_pd = pd.DataFrame([], columns=result.columns, index=['format'])
        num_format_pd.loc['format', :] = '0.00'
        num_format_pd.loc['format', "净流入占比"] = '0.00%'
        excel.write_pandas(result, worksheet, begin_row_number=0, begin_col_number=1,
                           num_format_pd=num_format_pd, color="orange", fillna=True)

        excel.chart_columns_plot(worksheet, sheet_name=sheet_name,
                                 series_name=["净流入金额", '沪深300'],
                                 chart_name="最近3个月每周融资余额净买入金额(亿元)",
                                 insert_pos="I15", cat_beg="B2", cat_end="B13",
                                 val_beg_list=["F2", "H2"], val_end_list=["F13", "H13"])

        num_format_pd = pd.DataFrame([], columns=stock.columns, index=['format'])
        num_format_pd.loc['format', :] = '0.00'
        num_format_pd.loc['format', "净流入占比"] = '0.00%'
        excel.write_pandas(stock, worksheet, begin_row_number=0, begin_col_number=8,
                           num_format_pd=num_format_pd, color="orange", fillna=True)

        num_format_pd = pd.DataFrame([], columns=industry.columns, index=['format'])
        num_format_pd.loc['format', :] = '0.00'
        num_format_pd.loc['format', "净流入占比"] = '0.00%'
        excel.write_pandas(industry, worksheet, begin_row_number=0, begin_col_number=15,
                           num_format_pd=num_format_pd, color="orange", fillna=True)
        excel.chart_columns_plot(worksheet, sheet_name=sheet_name,
                                 series_name=["净流入占比", '净流入金额'],
                                 chart_name="行业最近1月融资净买入金额",
                                 insert_pos="I32", cat_beg="P2", cat_end="P30",
                                 val_beg_list=["U2", "S2"], val_end_list=["U30", "S30"])

        excel.close()
コード例 #30
0
    def cal_factor_barra_hsigma(self, beg_date, end_date):
        """
        股票收益率和市场收益率回归之后的残差收益率标准差 (残差收益率的beta中计算过了)
        需要 Beta Size 因子做回归取残差
        """

        term = 252
        half_life = 62
        min_periods = 20

        res_pct = self.get_risk_factor_exposure("cne5_raw_beta_res_pct").T
        size_data = self.get_risk_factor_exposure("cne5_normal_size")
        beta_data = self.get_risk_factor_exposure("cne5_normal_beta")

        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(
            set(res_pct.index) & set(date_series) & set(size_data.columns)
            & set(beta_data.columns))
        date_series.sort()

        res = pd.DataFrame([])

        for i in range(0, len(date_series)):

            date = date_series[i]
            print('Calculating Barra Risk factor %s at date %s' %
                  (self.factor_name, date))
            data_beg_date = Date().get_trade_date_offset(date, -(term - 1))
            data_pre = res_pct.ix[data_beg_date:date, :]
            data_pre = data_pre.dropna(how='all')

            data_std = data_pre.ewm(halflife=half_life,
                                    min_periods=min_periods).std().loc[date, :]
            data_date = pd.DataFrame(data_std) * np.sqrt(250)
            data_date.columns = [date]

            regression_data = pd.concat(
                [size_data[date], beta_data[date], data_date], axis=1)
            regression_data.columns = ['sise', 'beta', 'y']
            regression_data = regression_data.dropna()

            if len(regression_data) > 0:

                y = regression_data['y'].values
                x = regression_data[['sise', 'beta']].values
                x_add = sm.add_constant(x)
                model = sm.OLS(y, x_add).fit()
                regression_data[
                    'res'] = regression_data['y'] - model.fittedvalues
                res_data_date = pd.DataFrame(regression_data['res'])
                res_data_date.columns = [date]
                res = pd.concat([res, res_data_date], axis=1)

        res = res.T.dropna(how='all').T

        if len(res) != 0:
            self.save_risk_factor_exposure(res, self.raw_factor_name_hsigma)
            res = Stock().remove_extreme_value_mad(res)
            res = Stock().standardization(res)
            self.save_risk_factor_exposure(res, self.factor_name_hsigma)
        else:
            print("The Result Risk factor %s from date %s to %s" %
                  (self.factor_name, beg_date, end_date))