Esempio n. 1
0
def plot_save_fun(company, tmp_df, return_df, locked_df, hold_time_list, index_df, now_time, if_hedge=False):
    fig_save_path = '/mnt/mfs/dat_whs/tmp_figure/'
    plt.figure(figsize=[16, 8])
    a = 0
    for hold_time in hold_time_list:
        daily_pos = deal_mix_factor(tmp_df, None, locked_df, hold_time, 2, if_only_long=False)

        if if_hedge:
            hedge_df = index_df.mul(daily_pos.sum(axis=1), axis=0)
            pnl_df = -hedge_df.sub((daily_pos * return_df).sum(axis=1), axis=0)['000300']
        else:
            pnl_df = (daily_pos * return_df).sum(axis=1)
        asset_df = pnl_df.cumsum()
        sp_y = bt.AZ_Sharpe_y(pnl_df)
        pot = AZ_Pot(daily_pos, asset_df.values[-1])
        if sp_y > 1:
            a = 1
            print(1)
            plt.plot(pnl_df.index, asset_df, label='hold_time={}, pot={}, sharpe={}'.format(hold_time, pot, sp_y))
    plt.legend()
    plt.title(company + '_' + now_time)
    plt.savefig(fig_save_path + 'company.png')
    plt.close()
    if a == 1:
        print('send_email')
        text = ''
        to = ['*****@*****.**']
        filepath = [fig_save_path + 'company.png']
        subject = company
        send_email.send_email(text, to, filepath, subject)
Esempio n. 2
0
def create_pnl_file(all_use_factor, file_name, hold_time):
    pnl_root_path = os.path.join(root_path,
                                 'tmp_pnl_file/{}'.format(file_name))
    bt.AZ_Path_create(pnl_root_path)
    use_factor_pot = all_use_factor[all_use_factor['pot_in'].abs() > 40]
    use_factor_pot.sort_values(by='sp_u', inplace=True)
    for i in use_factor_pot.index[:3]:
        key, fun_name, name1, name2, name3, filter_fun_name, sector_name, *result = all_use_factor.iloc[
            i]
        print('**************************************')
        print('now {}\'s is running, key={}'.format(i, key))
        save_file_name = '|'.join([
            str(key), fun_name, name1, name2, name3, filter_fun_name,
            sector_name
        ]) + '.csv'
        fun_set = [mul_fun, sub_fun, add_fun]
        mix_fun_set = create_fun_set_2(fun_set)
        fun = mix_fun_set[fun_name]

        factor_set = load_part_factor(sector_name, xnms, xinx,
                                      [name1, name2, name3])
        choose_1 = factor_set[name1]
        choose_2 = factor_set[name2]
        choose_3 = factor_set[name3]
        mix_factor = fun(choose_1, choose_2, choose_3)
        daily_pos = deal_mix_factor(mix_factor, sector_df, suspendday_df,
                                    limit_buy_sell_df, hold_time, lag,
                                    if_only_long)

        in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \
        fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, daily_pos, return_choose, index_df,
                                                                 if_hedge=True, hedge_ratio=1, if_return_pnl=True,
                                                                 if_only_long=if_only_long)
        print(in_condition, out_condition, ic, sharpe_q_in_df_u,
              sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, sharpe_q_out)
        print(result)
Esempio n. 3
0
def pos_sum(all_use_factor, hold_time):
    all_use_factor['sort_line'] = all_use_factor['sp_u'].abs()
    all_use_factor.sort_values(by='sort_line', inplace=True, ascending=False)
    all_use_factor.drop(columns='sort_line', inplace=True)
    filter_cond = all_use_factor.apply(
        lambda x: not ('volume_count_down_p60d' in set(x)), axis=1)
    all_use_factor = all_use_factor[filter_cond]

    a = all_use_factor[all_use_factor.pot_in.abs() > 20]
    a = a.iloc[:10]
    b = a.copy()
    b['buy_sell'] = (a['sp_m'] > 0).astype(int).replace(0, -1)
    print(b['con_out'].sum() / len(a), len(a))
    factor_info = b[['fun_name', 'name1', 'name2', 'name3',
                     'buy_sell']].replace(0, -1)
    config = dict()
    config['factor_info'] = factor_info
    pd.to_pickle(config, '/mnt/mfs/alpha_whs/config01.pkl')

    sum_factor_df = pd.DataFrame(columns=xnms, index=xinx)
    for i in a.index:
        key, fun_name, name1, name2, name3, filter_fun_name, sector_name, con_in, con_out, ic, sp_u, sp_m, sp_d, \
        pot_in, fit_ratio, leve_ratio, sp_out = a.loc[i]

        print('***************************************************')
        print('now {}\'s is running, key={}, {}, {}, {}, {}'.format(
            i, key, fun_name, name1, name2, name3))
        fun_set = [mul_fun, sub_fun, add_fun]
        mix_fun_set = create_fun_set_2(fun_set)
        fun = mix_fun_set[fun_name]

        factor_set = load_part_factor(sector_name, xnms, xinx,
                                      [name1, name2, name3])
        choose_1 = factor_set[name1]
        choose_2 = factor_set[name2]
        choose_3 = factor_set[name3]
        mix_factor = fun(choose_1, choose_2, choose_3)
        if len(mix_factor.abs().sum(axis=1).replace(
                0, np.nan).dropna()) / len(mix_factor) > 0.1:
            pos_daily = deal_mix_factor(mix_factor, sector_df, suspendday_df,
                                        limit_buy_sell_df, hold_time, lag,
                                        if_only_long)
            in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \
            fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, pos_daily, return_choose, index_df,
                                                                     if_hedge=True, hedge_ratio=1, if_return_pnl=True,
                                                                     if_only_long=if_only_long)
            plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df),
                             '{}, key={}'.format(i, key))
            print(con_in, con_out, ic, sp_u, sp_m, sp_d, pot_in, fit_ratio,
                  leve_ratio, sp_out)
            print(in_condition, out_condition, ic, sharpe_q_in_df_u,
                  sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio,
                  leve_ratio, sharpe_q_out)
            if sp_m > 0:
                sum_factor_df = sum_factor_df.add(mix_factor, fill_value=0)
            else:
                sum_factor_df = sum_factor_df.add(-mix_factor, fill_value=0)
        else:
            print('pos not enough!')
    sum_pos_df = deal_mix_factor(sum_factor_df, sector_df, suspendday_df,
                                 limit_buy_sell_df, hold_time, lag,
                                 if_only_long).round(14)
    in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \
    fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, sum_pos_df, return_choose, index_df,
                                                             if_hedge=True, hedge_ratio=1, if_return_pnl=True,
                                                             if_only_long=if_only_long)
    print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m,
          sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sharpe_q_out)
    return pnl_df, sum_pos_df
Esempio n. 4
0
def para_result(begin_date,
                end_date,
                sector_df,
                locked_df,
                index_df,
                hold_time,
                i,
                fun_name,
                name_1,
                name_2,
                name_3,
                sector_name,
                result,
                if_only_long=False):
    cost_1 = 0.001
    lag = 1
    xnms, xinx = sector_df.columns, sector_df.index
    r_con_in, r_con_out, r_ic, r_sp_u, r_sp_m, r_sp_d, r_pot_in, r_fit_ratio, r_leve_ratio, r_sharpe_out = result
    figure_save_path = '/mnt/mfs/dat_whs/result/tmp'
    # figure_save_path = os.path.join(root_path, 'tmp_figure')
    fun_set = [mul_fun, sub_fun, add_fun]
    mix_fun_set = create_fun_set_2(fun_set)
    fun = mix_fun_set[fun_name]

    factor_set = load_part_factor(sector_name, begin_date, end_date, xnms,
                                  xinx, [name_1, name_2, name_3])
    choose_1 = factor_set[name_1]
    choose_2 = factor_set[name_2]
    choose_3 = factor_set[name_3]

    return_data = load_pct(begin_date, end_date, xnms, xinx)
    mix_factor = fun(choose_1, choose_2, choose_3)
    pos_df_daily = deal_mix_factor(mix_factor, sector_df, locked_df, hold_time,
                                   lag, if_only_long)

    in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \
    fit_ratio, leve_ratio, sharpe_q_out, pnl_df = filter_all(cut_date, pos_df_daily, return_data, index_df,
                                                             if_hedge=False, hedge_ratio=1, if_return_pnl=True,
                                                             if_only_long=False)

    hedge_df = 1 * index_df.mul(pos_df_daily.sum(axis=1), axis=0)
    pnl_df_h = -hedge_df.sub((pos_df_daily * return_data).sum(axis=1), axis=0)
    pnl_df_h = pnl_df_h[pnl_df_h.columns[0]]

    fig = plt.figure(figsize=(16, 12))
    fig.suptitle('{} factor figure'.format(i), fontsize=40)
    ax1 = fig.add_subplot(2, 2, 1)
    ax2 = fig.add_subplot(2, 2, 2)
    ax3 = fig.add_subplot(2, 2, 3)
    ax4 = fig.add_subplot(2, 2, 4)

    asset = pnl_df.cumsum()
    asset_hedge = pnl_df_h.cumsum()

    pot = bt.AZ_Pot(pos_df_daily, asset.iloc[-1])
    sharpe = bt.AZ_Sharpe_y(pnl_df)

    pot_h = bt.AZ_Pot(pos_df_daily, asset_hedge.iloc[-1])
    sharpe_h = bt.AZ_Sharpe_y(pnl_df_h)

    if asset.values[-1] < 0:
        asset = -asset
        asset_hedge = -asset_hedge

    cost_matrix_1 = (pos_df_daily.diff().abs() * cost_1).sum(axis=1).cumsum()

    ax1.plot(pnl_df.index,
             asset,
             label='raw_line, pot={}, sharpe={}'.format(pot, sharpe))
    ax1.plot(pnl_df.index,
             asset - cost_matrix_1,
             label='cost_line, pot={}, sharpe={}'.format(pot, sharpe))
    ax1.plot(pnl_df.index,
             asset_hedge,
             label='helge_line, pot_h={}, sharpe_h={}'.format(pot_h, sharpe_h))

    ax1.set_title('{}, {}, {}, {}'.format(fun_name, name_1, name_2, name_3))
    ax1.grid(1)
    ax1.legend()

    rolling_sharpe, cut_sharpe = AZ_Rolling_sharpe(pnl_df,
                                                   roll_year=3,
                                                   year_len=250)
    ax2.hist(rolling_sharpe.values, bins=200)
    ax2.set_title('{},{},{},{},{},{},{},{}'.format(round(r_ic, 4), r_sp_u,
                                                   r_sp_m, r_sp_d, r_pot_in,
                                                   round(r_fit_ratio, 4),
                                                   round(r_leve_ratio,
                                                         4), r_sharpe_out))
    ax2.grid(axis='y')

    result_filter = filter_pot_sharpe(cut_date,
                                      fun(choose_1, choose_2, choose_3),
                                      return_data,
                                      index_df,
                                      lag=1,
                                      hold_time=5,
                                      if_hedge=False,
                                      hedge_ratio=1)
    ax3.set_title(','.join([str(x) for x in result_filter]))
    ax4.set_title(
        'pot_in={}, leve_ratio={}, sharpe_q_in_df_m={}, fit_ratio={}'.format(
            pot_in, round(leve_ratio, 4), sharpe_q_in_df_m,
            round(fit_ratio, 4)))
    # plt.show()

    # ax3.plot(pos_df_daily.sum(axis=1), label=','.join([str(x) for x in result]))
    ax3.plot(bt.AZ_Rolling(pos_df_daily.sum(axis=1), 250).mean())
    ax3.legend()
    plt.savefig(
        os.path.join(figure_save_path, '|'.join(
            [fun_name, name_1, name_2, name_3])) + '.png')

    # text = '|'.join([str(x) for x in [fun_name, name_1, name_2, name_3] + list(result)])
    text = '.'
    to = ['*****@*****.**']
    subject = '|'.join([fun_name, name_1, name_2, name_3])
    filepath = [
        os.path.join(figure_save_path, '|'.join(
            [fun_name, name_1, name_2, name_3])) + '.png'
    ]
    send_email.send_email(text, to, filepath, subject)
Esempio n. 5
0
def part_test_index_3(time_para_dict, sector_name, key, name_1, name_2, name_3,
                      sector_df, suspendday_df, limit_buy_sell_df,
                      return_choose, index_df, cut_date, log_save_file,
                      result_save_file, if_save, if_hedge, hold_time,
                      if_only_long, xnms, xinx, total_para_num):
    lock = Lock()
    lag = 2
    start_time = time.time()
    load_time_1 = time.time()
    # load因子,同时根据stock_universe筛选数据.
    factor_set = load_part_factor(sector_name, xnms, xinx,
                                  [name_1, name_2, name_3])
    load_time_2 = time.time()
    # 加载花费数据时间
    load_delta = round(load_time_2 - load_time_1, 2)
    # 生成混合函数集
    fun_set = [sub_fun, add_fun, mul_fun]
    fun_mix_2_set = create_fun_set_2(fun_set)
    #################
    # 更换filter函数 #
    #################
    filter_fun = filter_all
    filter_name = filter_fun.__name__
    # save_file_name = os.path.split(log_save_file)[:-4]
    # pnl_save_path = os.path.join('/mnt/mfs/dat_whs/data/mix_factor_pnl', save_file_name)
    # bt.AZ_Path_create(pnl_save_path)
    for fun in fun_mix_2_set:
        mix_factor = fun(factor_set[name_1], factor_set[name_2],
                         factor_set[name_3])
        if len(mix_factor.abs().sum(axis=1).replace(
                0, np.nan).dropna()) / len(mix_factor) < 0.1:
            # print('{}%, {}, {}, {}, {}, ERROR pos not enough, {}'
            #       .format(round(key / total_para_num, 4) * 100, key, name_1, name_2, name_3,
            #               mix_factor.sum(axis=1).mean()))
            continue

        daily_pos = deal_mix_factor(mix_factor, sector_df, suspendday_df,
                                    limit_buy_sell_df, hold_time, lag,
                                    if_only_long)
        # 返回样本内筛选结果
        result_dict = filter_time_para_fun(time_para_dict,
                                           daily_pos,
                                           return_choose,
                                           index_df,
                                           if_hedge=if_hedge,
                                           hedge_ratio=1,
                                           if_return_pnl=False,
                                           if_only_long=if_only_long)
        for time_key in result_dict.keys():
            in_condition, *filter_result = result_dict[time_key]
            # result 存储
            if in_condition:
                if if_save:
                    with lock:
                        f = open(result_save_file, 'a')
                        write_list = [
                            time_key, key, fun.__name__, name_1, name_2,
                            name_3, filter_name, sector_name, in_condition
                        ] + filter_result
                        f.write('|'.join([str(x) for x in write_list]) + '\n')
                print([
                    time_key, in_condition, fun.__name__, name_1, name_2,
                    name_3
                ] + filter_result)
    end_time = time.time()
    # 参数存储
    if if_save:
        with lock:
            f = open(log_save_file, 'a')
            write_list = [
                key, name_1, name_2, name_3, filter_name, sector_name,
                round(end_time - start_time, 4), load_delta
            ]
            f.write('|'.join([str(x) for x in write_list]) + '\n')

    print('{}%, {}, {}, {}, {}, cost {} seconds, load_cost {} seconds'.format(
        round(key / total_para_num * 100, 4), key, name_1, name_2, name_3,
        round(end_time - start_time, 2), load_delta))
Esempio n. 6
0
def create_pnl_file_and_delete_factor(factor_root_path, sector_name_list):
    begin_date = pd.to_datetime('20100101')
    cut_date = pd.to_datetime('20160401')
    end_date = pd.to_datetime('20180401')

    # sector_name = 'market_top_100'
    index_name = '000016'
    for sector_name in sector_name_list:
        # sector
        sector_df = load_sector_data(begin_date, end_date, sector_name)

        xnms = sector_df.columns
        xinx = sector_df.index

        # suspend or limit up_dn
        # suspendday_df, limit_buy_df, limit_sell_df = load_locked_data(xnms, xinx)
        suspendday_df, limit_buy_sell_df = load_locked_data_both(xnms, xinx)
        # return
        return_choose = pd.read_table(
            '/mnt/mfs/DAT_EQT/EM_Funda/DERIVED_14/aadj_r.csv',
            sep='|',
            index_col=0).astype(float)
        return_choose.index = pd.to_datetime(return_choose.index)
        return_choose = return_choose.reindex(columns=xnms,
                                              index=xinx,
                                              fill_value=0)

        # index data
        index_df = load_index_data(xinx, index_name)

        factor_path = factor_root_path + '/' + sector_name
        factor_name_list = [x for x in os.listdir(factor_path) if 'pkl' in x]
        save_pnl_path = os.path.join(root_path,
                                     'data/single_factor_pnl/' + sector_name)
        # bt.AZ_Delete_file(save_pnl_path)
        for factor_name in factor_name_list:
            factor_load_path = os.path.join(factor_path, factor_name)
            print(factor_load_path)
            # if not os.path.exists(os.path.join(save_pnl_path, factor_name[:-4] + '.csv')):

            factor_df = pd.read_pickle(factor_load_path)
            factor_df = factor_df.reindex(columns=xnms,
                                          index=xinx,
                                          fill_value=0)
            daily_pos = deal_mix_factor(factor_df,
                                        sector_df,
                                        suspendday_df,
                                        limit_buy_sell_df,
                                        hold_time=5,
                                        lag=2,
                                        if_only_long=False)

            pnl_df = (daily_pos * return_choose).sum(axis=1)

            bt.AZ_Path_create(save_pnl_path)
            pnl_df = pd.DataFrame(pnl_df, columns=[factor_name[:-4]])
            # pnl_df.to_csv(os.path.join(save_pnl_path, factor_name[:-4] + '.csv'))
            if len(pnl_df.replace(0, np.nan).dropna()) / len(pnl_df) < 0.3:
                print(factor_name + ' is delete')
                os.remove(factor_load_path)
            else:
                pnl_df.to_csv(
                    os.path.join(save_pnl_path, factor_name[:-4] + '.csv'))
            print('pnl create!')