Esempio n. 1
0
def save_sector(only_old_data: bool):
    columns = [DATE, CODE, RET_1]
    rolling_columns = [
        E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S,
        SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO,
        EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO
    ]
    columns.extend(rolling_columns)
    portfolio = Portfolio()
    # 최소 시가총액 100억
    portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :]

    # KRX_SECTOR가 존재하지 않는 데이터 제거
    portfolio.dropna(subset=[KRX_SECTOR], inplace=True)
    portfolio = portfolio.sort_values(by=[CODE, DATE]).reset_index(drop=True)

    # sector를 숫자로 나타냄
    label_encoder = LabelEncoder()
    labeled_sector = label_encoder.fit_transform(portfolio[KRX_SECTOR])
    krx_sectors = label_encoder.classes_
    # 숫자로 나타낸 것을 모스부호로 표현
    one_hot_encoder = OneHotEncoder(sparse=False)
    one_hot_encoded_sector = one_hot_encoder.fit_transform(
        labeled_sector.reshape(len(labeled_sector), 1))
    # 기존 데이터에 붙히기
    df_one_hot_encoded_sector = pd.DataFrame(
        one_hot_encoded_sector, columns=krx_sectors).reset_index(drop=True)
    portfolio[krx_sectors] = df_one_hot_encoded_sector
    krx_sectors = list(krx_sectors)
    save_data(only_old_data, portfolio, SECTOR, rolling_columns, krx_sectors)
Esempio n. 2
0
def plot_rank_ic(portfolio: Portfolio,
                 factor: str,
                 rolling: int = 6,
                 title: str = '') -> pd.DataFrame:
    portfolio = portfolio.periodic_rank(min_rank=1,
                                        max_rank=10000,
                                        factor=factor,
                                        drop_rank=False)
    factor_rank = "{factor}_rank".format(factor=factor)
    portfolio = portfolio.rename(index=str, columns={"rank": factor_rank})
    portfolio = portfolio.periodic_rank(min_rank=1,
                                        max_rank=10000,
                                        factor=RET_3,
                                        drop_rank=False)
    ret_1_rank = "ret_1_rank"
    portfolio = portfolio.rename(index=str, columns={"rank": ret_1_rank})
    rank_ic = portfolio.groupby(by=[DATE]).apply(lambda x: 1 - (6 * ((x[
        factor_rank] - x[ret_1_rank])**2).sum()) / (len(x) * (len(x)**2 - 1)))

    rank_ic = pd.DataFrame(rank_ic, columns=['rank_ic'])
    rolling_column_name = 'rolling_{}'.format(rolling)
    rank_ic[rolling_column_name] = rank_ic['rank_ic'].rolling(
        window=rolling).mean()
    rank_ic = rank_ic.dropna(subset=[rolling_column_name])

    rank_ic.plot()
    plt.title(title)
    plt.axhline(y=0, color='black')
    plt.ylabel('Rank IC')
    plt.xlabel('Date')
    plt.show()

    return rank_ic
Esempio n. 3
0
def save_bollinger(only_old_data: bool):
    rolling_columns = [
        E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S,
        SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO,
        EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO
    ]
    portfolio = Portfolio()
    # 최소 시가총액 100억
    portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :]

    # Bollinger
    portfolio = portfolio.sort_values(by=[CODE, DATE]).reset_index(drop=True)
    portfolio['mean'] = portfolio.groupby(CODE)[ENDP].rolling(
        20).mean().reset_index(drop=True)
    portfolio['std'] = portfolio.groupby(CODE)[ENDP].rolling(
        20).std().reset_index(drop=True)
    portfolio[BOLLINGER] = portfolio['mean'] - 2 * portfolio['std']
    bollingers = portfolio.loc[portfolio[ENDP] < portfolio[BOLLINGER],
                               [DATE, CODE]]

    save_data(only_old_data,
              portfolio,
              BOLLINGER,
              rolling_columns,
              filtering_dataframe=bollingers)
Esempio n. 4
0
def save_all(only_old_data: bool):
    rolling_columns = [
        E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S,
        SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO,
        EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO
    ]
    portfolio = Portfolio()
    # 최소 시가총액 100억
    portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :]

    save_data(only_old_data, portfolio, ALL, rolling_columns)
Esempio n. 5
0
    def test_show_plot(self):
        pf = Portfolio()
        pf = pf.loc[pf[DATE] >= datetime(year=2011, month=5, day=31), :]

        pf.show_plot()

        pf.show_plot(cumulative=False, weighted=True, title='title', show_benchmark=False)
Esempio n. 6
0
def save_concepts(old_data: bool):
    log_mktcap = 'log_mktcap'
    portfolio = Portfolio()
    portfolio[log_mktcap] = np.log(portfolio[MKTCAP])

    value_factors = [E_P, B_P, S_P, C_P, DIVP]
    size_factors = [log_mktcap]
    momentum_factors = [MOM1, MOM12]
    quality_factors = [
        ROA, ROE, ROIC, S_A, DEBT_RATIO, EQUITY_RATIO, LIQ_RATIO
    ]
    volatility_factors = [VOL_1D]

    factor_groups = {}

    for value_factor, value_name in zip([value_factors, []], ['value_', '']):
        for size_factor, size_name in zip([size_factors, []], ['size_', '']):
            for momentum_factor, momentum_name in zip([momentum_factors, []],
                                                      ['momentum_', '']):
                for quality_factor, quality_name in zip([quality_factors, []],
                                                        ['quality_', '']):
                    for volatility_factor, volatility_name in zip(
                        [volatility_factors, []], ['volatility_', '']):
                        factor_group = []
                        factor_group.extend(value_factor)
                        factor_group.extend(size_factor)
                        factor_group.extend(momentum_factor)
                        factor_group.extend(quality_factor)
                        factor_group.extend(volatility_factor)
                        factor_names = []
                        factor_names.extend(value_name)
                        factor_names.extend(size_name)
                        factor_names.extend(momentum_name)
                        factor_names.extend(quality_name)
                        factor_names.extend(volatility_name)
                        factor_name = ''.join(factor_names)
                        if factor_name:
                            factor_groups[factor_name[:-1]] = factor_group

    factor_group_len = len(factor_groups)

    with Pool(os.cpu_count()) as p:
        rs = [
            p.apply_async(save_data, [old_data, pf, key, value])
            for pf, (key, value) in zip(
                [portfolio
                 for _ in range(factor_group_len)], factor_groups.items())
        ]
        for r in rs:
            r.wait()
        p.close()
        p.join()
Esempio n. 7
0
def save_macro(only_old_data: bool):
    rolling_columns = [
        E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S,
        SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO,
        EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO, TERM_SPREAD_KOR,
        TERM_SPREAD_US, CREDIT_SPREAD_KOR, LOG_USD2KRW, LOG_CHY2KRW,
        LOG_EURO2KRW, TED_SPREAD, LOG_NYSE, LOG_NASDAQ, LOG_OIL
    ]
    portfolio = Portfolio()
    # 최소 시가총액 100억
    portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :]

    save_data(only_old_data, portfolio, MACRO, rolling_columns)
Esempio n. 8
0
def save_data(old_data: bool,
              portfolio: Portfolio,
              data_name: str,
              rolling_columns: list,
              dummy_columns: list = None,
              filtering_dataframe=None):
    print("Start saving {}...".format(data_name))

    portfolio = portfolio.sort_values(by=[CODE, DATE]).reset_index(drop=True)

    if old_data:
        # old data
        # RET_1이 존재하지 않는 마지막 달 제거
        old_portfolio = portfolio.loc[~pd.isna(portfolio[RET_1]), :]
        old_set = get_data_set(old_portfolio, rolling_columns, dummy_columns)

        if isinstance(filtering_dataframe,
                      pd.DataFrame) and not filtering_dataframe.empty:
            filtering_dataframe = filtering_dataframe[[DATE, CODE]]
            old_set = pd.merge(old_set, filtering_dataframe, on=[DATE, CODE])

        old_set.reset_index(drop=True).to_dataframe().to_hdf(
            'data/{}.h5'.format(data_name), key='df', format='table', mode='w')
    else:
        # recent data
        recent_set = get_data_set(portfolio,
                                  rolling_columns,
                                  dummy_columns,
                                  return_y=False)
        # 마지막 달만 사용
        last_month = np.sort(recent_set[DATE].unique())[-1]
        recent_set = recent_set.loc[recent_set[DATE] == last_month, :]

        if isinstance(filtering_dataframe,
                      pd.DataFrame) and not filtering_dataframe.empty:
            filtering_dataframe = filtering_dataframe[[DATE, CODE]]
            recent_set = pd.merge(recent_set,
                                  filtering_dataframe,
                                  on=[DATE, CODE])

        recent_set.reset_index(drop=True).to_dataframe().to_hdf(
            'data/{}_recent.h5'.format(data_name),
            key='df',
            format='table',
            mode='w')
Esempio n. 9
0
def save_filter(only_old_data: bool):
    rolling_columns = [
        E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S,
        SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO,
        EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO
    ]
    portfolio = Portfolio()
    # 최소 시가총액 100억
    portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :]

    # 2 < PER < 10.0 (http://pluspower.tistory.com/9)
    portfolio = portfolio.loc[(portfolio[PER] < 10) & (portfolio[PER] > 2)]
    # 0.2 < PBR < 1.0
    portfolio = portfolio.loc[(portfolio[PBR] < 1) & (portfolio[PBR] > 0.2)]
    # 2 < PCR < 8
    portfolio = portfolio.loc[(portfolio[PCR] < 8) & (portfolio[PCR] > 2)]
    # 0 < PSR < 0.8
    portfolio = portfolio.loc[portfolio[PSR] < 0.8]

    save_data(only_old_data, portfolio, FILTER, rolling_columns)
import pandas as pd
import numpy as np
from ksif import Portfolio, columns
from ksif.core.columns import CODE, DATE, B_P, E_P, MOM12_1, GP_A, VOL_3M, RET_1
from tabulate import tabulate

INFORMATION_RATIO = 'information_ratio'
ALL = 'all'
VALUE_MOMENTUM = 'value+momentum'
VOLATILITY = 'volatility'
QUALITY = 'quality'
MOMENTUM = 'momentum'
VALUE = 'value'
std = 'std_'

pf = Portfolio()
universe = pf.loc[(pf[columns.MKTCAP] >= 50000000000) & (~np.isnan(pf[RET_1]))
                  & (~np.isnan(pf[B_P])) & (~np.isnan(pf[E_P])) &
                  (~np.isnan(pf[MOM12_1])) & (~np.isnan(pf[GP_A])) &
                  (~np.isnan(pf[VOL_3M])), :]
universe = universe.periodic_standardize(factor=B_P)
universe = universe.periodic_standardize(factor=E_P)
universe[VALUE] = (universe[std + B_P] + universe[std + E_P]) / 2
universe = universe.periodic_standardize(factor=MOM12_1)
universe[MOMENTUM] = universe[std + MOM12_1]
universe = universe.periodic_standardize(factor=GP_A)
universe[QUALITY] = universe[std + GP_A]
universe = universe.periodic_standardize(factor=VOL_3M)
universe[VOLATILITY] = universe[std + VOL_3M]

universe[VALUE_MOMENTUM] = universe[VALUE] + universe[MOMENTUM]
Esempio n. 11
0
scaler = MinMaxScaler()

if __name__ == '__main__':
    columns = [
        DATE, CODE, RET_1, B_P, E_P, DIVP, S_P, C_P, ROE, ROA, ROIC, S_A,
        LIQ_RATIO, EQUITY_RATIO, ASSETSYOY, BETA_3M, MKTCAP, MOM1, MOM12,
        VOL_3M, TRADING_VOLUME_RATIO
    ]

    rolling_columns = [
        B_P, E_P, DIVP, S_P, C_P, ROE, ROA, ROIC, S_A, LIQ_RATIO, EQUITY_RATIO,
        ASSETSYOY, BETA_3M, MKTCAP, MOM1, MOM12, VOL_3M, TRADING_VOLUME_RATIO
    ]

    pf = Portfolio()
    pf = pf.loc[~pd.isna(pf[RET_1]), :]
    months = sorted(pf[DATE].unique())

    result_columns = [DATE, CODE, RET_1]
    rolled_columns = []
    all_set = pf.reset_index(drop=True)
    for column in rolling_columns:
        t_0 = column + '_t'
        t_1 = column + '_t-1'
        t_2 = column + '_t-2'
        t_3 = column + '_t-3'
        t_4 = column + '_t-4'
        t_5 = column + '_t-5'
        t_6 = column + '_t-6'
        t_7 = column + '_t-7'
Esempio n. 12
0
def compare_ensemble(methods, models, quantiles, start_number: int = 0, end_number: int = 9, step: int = 1,
                     to_csv: bool = True, show_plot: bool = False):
    file_names = []
    CAGRs = []
    GAGR_rank_correlations = []
    CAGR_rank_p_values = []
    IRs = []
    IR_rank_correlations = []
    IR_rank_p_values = []
    SRs = []
    SR_rank_correlations = []
    SR_rank_p_values = []
    MDDs = []
    alphas = []
    alpha_rank_correlations = []
    alpha_rank_p_values = []
    betas = []
    rigid_accuracies = []
    decile_accuracies = []
    quarter_accuracies = []
    half_accuracies = []
    kospi_larges = []
    kospi_middles = []
    kospi_smalls = []
    kosdaq_larges = []
    kosdaq_middles = []
    kosdaq_smalls = []

    firms = Portfolio(include_holding=True, include_finance=True, include_managed=True, include_suspended=True).loc[:,
            [DATE, CODE, MKTCAP, EXCHANGE]]
    firms[DATE] = pd.to_datetime(firms[DATE])

    firms[RANK] = firms[[DATE, EXCHANGE, MKTCAP]].groupby([DATE, EXCHANGE]).rank(ascending=False)
    firms[KOSPI_LARGE] = firms.apply(
        lambda row: 1 if (row[EXCHANGE] == '유가증권시장') and (row[RANK] <= 100) else 0, axis=1)
    firms[KOSPI_MIDDLE] = firms.apply(
        lambda row: 1 if (row[EXCHANGE] == '유가증권시장') and (100 < row[RANK] <= 300) else 0, axis=1)
    firms[KOSPI_SMALL] = firms.apply(
        lambda row: 1 if (row[EXCHANGE] == '유가증권시장') and (300 < row[RANK]) else 0, axis=1)
    firms[KOSDAQ_LARGE] = firms.apply(
        lambda row: 1 if (row[EXCHANGE] == '코스닥') and (row[RANK] <= 100) else 0, axis=1)
    firms[KOSDAQ_MIDDLE] = firms.apply(
        lambda row: 1 if (row[EXCHANGE] == '코스닥') and (100 < row[RANK] <= 300) else 0, axis=1)
    firms[KOSDAQ_SMALL] = firms.apply(
        lambda row: 1 if (row[EXCHANGE] == '코스닥') and (300 < row[RANK]) else 0, axis=1)

    firms = firms.loc[
            :, [DATE, CODE, KOSPI_LARGE, KOSPI_MIDDLE, KOSPI_SMALL, KOSDAQ_LARGE, KOSDAQ_MIDDLE, KOSDAQ_SMALL]
            ]

    for method in methods:
        for quantile in quantiles:
            for model in tqdm(models):
                ensemble_summary, ensemble_portfolios = get_ensemble(
                    method, model_name=model, start_number=start_number, end_number=end_number, step=step,
                    quantile=quantile, show_plot=show_plot
                )

                if ensemble_summary is None and ensemble_portfolios is None:
                    continue

                ensemble_portfolio = pd.merge(ensemble_portfolios[-1], firms, on=[DATE, CODE])
                ensemble_portfolio_count = ensemble_portfolio[[DATE, CODE]].groupby(DATE).count()
                ensemble_portfolio_count.rename(columns={CODE: COUNT}, inplace=True)
                ensemble_portfolio_sum = ensemble_portfolio[[
                    DATE, KOSPI_LARGE, KOSPI_MIDDLE, KOSPI_SMALL, KOSDAQ_LARGE, KOSDAQ_MIDDLE, KOSDAQ_SMALL
                ]].groupby(DATE).sum()
                ensemble_portfolio_ratio = pd.merge(ensemble_portfolio_sum, ensemble_portfolio_count, on=DATE)
                ensemble_portfolio_ratio[KOSPI_LARGE] \
                    = ensemble_portfolio_ratio[KOSPI_LARGE] / ensemble_portfolio_ratio[COUNT]
                ensemble_portfolio_ratio[KOSPI_MIDDLE] \
                    = ensemble_portfolio_ratio[KOSPI_MIDDLE] / ensemble_portfolio_ratio[COUNT]
                ensemble_portfolio_ratio[KOSPI_SMALL] \
                    = ensemble_portfolio_ratio[KOSPI_SMALL] / ensemble_portfolio_ratio[COUNT]
                ensemble_portfolio_ratio[KOSDAQ_LARGE] \
                    = ensemble_portfolio_ratio[KOSDAQ_LARGE] / ensemble_portfolio_ratio[COUNT]
                ensemble_portfolio_ratio[KOSDAQ_MIDDLE] \
                    = ensemble_portfolio_ratio[KOSDAQ_MIDDLE] / ensemble_portfolio_ratio[COUNT]
                ensemble_portfolio_ratio[KOSDAQ_SMALL] \
                    = ensemble_portfolio_ratio[KOSDAQ_SMALL] / ensemble_portfolio_ratio[COUNT]

                file_names.append(_get_file_name(method, model, quantile))

                CAGRs.append(ensemble_summary[CAGR].values[-1])
                CAGR_rankIC = spearmanr(ensemble_summary[CAGR].values, ensemble_summary[CAGR].index)
                GAGR_rank_correlations.append(CAGR_rankIC[0])
                CAGR_rank_p_values.append(CAGR_rankIC[1])

                IRs.append(ensemble_summary[IR].values[-1])
                IR_rankIC = spearmanr(ensemble_summary[IR].values, ensemble_summary[IR].index)
                IR_rank_correlations.append(IR_rankIC[0])
                IR_rank_p_values.append(IR_rankIC[1])

                SRs.append(ensemble_summary[SR].values[-1])
                SR_rankIC = spearmanr(ensemble_summary[SR].values, ensemble_summary[SR].index)
                SR_rank_correlations.append(SR_rankIC[0])
                SR_rank_p_values.append(SR_rankIC[1])

                MDDs.append(ensemble_summary[MDD].values[-1])

                alphas.append(ensemble_summary[FAMA_FRENCH_ALPHA].values[-1])
                alpha_rankIC = spearmanr(ensemble_summary[FAMA_FRENCH_ALPHA].values,
                                         ensemble_summary[FAMA_FRENCH_ALPHA].index)
                alpha_rank_correlations.append(alpha_rankIC[0])
                alpha_rank_p_values.append(alpha_rankIC[1])
                betas.append(ensemble_summary[FAMA_FRENCH_BETA].values[-1])

                rigid_accuracies.append(ensemble_summary[RIGID_ACCURACY].values[-1])
                decile_accuracies.append(ensemble_summary[DECILE_ACCURACY].values[-1])
                quarter_accuracies.append(ensemble_summary[QUARTER_ACCURACY].values[-1])
                half_accuracies.append(ensemble_summary[HALF_ACCURACY].values[-1])

                kospi_larges.append(ensemble_portfolio_ratio[KOSPI_LARGE].mean())
                kospi_middles.append(ensemble_portfolio_ratio[KOSPI_MIDDLE].mean())
                kospi_smalls.append(ensemble_portfolio_ratio[KOSPI_SMALL].mean())
                kosdaq_larges.append(ensemble_portfolio_ratio[KOSDAQ_LARGE].mean())
                kosdaq_middles.append(ensemble_portfolio_ratio[KOSDAQ_MIDDLE].mean())
                kosdaq_smalls.append(ensemble_portfolio_ratio[KOSDAQ_SMALL].mean())

    comparison_result = pd.DataFrame(data={
        'Model': file_names,
        'CAGR': CAGRs,
        'CAGR RC': GAGR_rank_correlations,
        'CAGR RC p-value': CAGR_rank_p_values,
        'IR': IRs,
        'IR RC': IR_rank_correlations,
        'IR RC p-value': IR_rank_p_values,
        'SR': SRs,
        'SR RC': SR_rank_correlations,
        'SR RC p-value': SR_rank_p_values,
        'FF alpha': alphas,
        'FF alpha RC': alpha_rank_correlations,
        'FF alpha RC p-value': alpha_rank_p_values,
        'FF betas': betas,
        'MDD': MDDs,
        'Rigid accuracy': rigid_accuracies,
        'Decile accuracy': decile_accuracies,
        'Quarter accuracy': quarter_accuracies,
        'Half accuracy': half_accuracies,
        'KOSPI Large': kospi_larges,
        'KOSPI Middle': kospi_middles,
        'KOSPI Small': kospi_smalls,
        'KOSDAQ Large': kosdaq_larges,
        'KOSDAQ Middle': kosdaq_middles,
        'KOSDAQ Small': kosdaq_smalls,
    })

    if to_csv:
        comparison_result.to_csv('summary/comparison_result.csv', index=False)

    return comparison_result
Esempio n. 13
0
def get_ensemble(method: str, model_name: str, start_number: int = 0, end_number: int = 9, step: int = 1,
                 quantile: int = 40, show_plot=True):
    """

    :param method: (str)
    :param model_name: (str)
    :param start_number: (int)
    :param end_number: (int)
    :param step: (int)
    :param quantile: (int)
    :param show_plot: (bool)

    :return ensemble_summary: (DataFrame)
        PORTFOLIO_RETURN    | (float)
        ACTIVE_RETURN       | (float)
        ACTIVE_RISK         | (float)
        IR                  | (float)
        CAGR                | (float)
        RIGID_ACCURACY      | (float)
        DECILE_ACCURACY     | (float)
        QUARTER_ACCURACY    | (float)
        HALF_ACCURACY       | (float)
    :return ensemble_portfolios: ([Portfolio])
        DATE                | (datetime)
        CODE                | (str)
        RET_1               | (float)
    """
    # Check parameters
    assert method in METHODS, "method does not exist."
    assert end_number > start_number + 1, "end_number should be bigger than (start_number + 1)."
    assert step >= 1, "step should be a positive integer."
    assert quantile > 1, "quantile should be an integer bigger than 1."

    result_file_name = _get_file_name(method, model_name, quantile)

    predictions = _get_predictions(model_name, start_number, end_number)

    get_ensemble_predictions = GET_ENSEMBLE_PREDICTIONS[method]

    ensemble_predictions = get_ensemble_predictions(predictions, quantile)

    # Append actual returns
    ensemble_predictions = [pd.merge(ensemble_prediction, actual_returns, on=[DATE, CODE]) for
                            ensemble_prediction in ensemble_predictions]

    # Cumulative ensemble
    ensemble_numbers = pd.DataFrame(index=ensemble_predictions[0][DATE].unique())
    ensemble_cumulative_returns = pd.DataFrame(index=ensemble_predictions[0][DATE].unique())
    for index, ensemble_prediction in enumerate(ensemble_predictions):
        ensemble_number = ensemble_prediction.groupby(by=[DATE])[CODE].count()
        ensemble_return = ensemble_prediction.groupby(by=[DATE])[RET_1].mean()
        ensemble_cumulative_return = _cumulate(ensemble_return)

        if (index + 1) % step == 0:
            ensemble_numbers[index + 1] = ensemble_number
            ensemble_cumulative_returns[index + 1] = ensemble_cumulative_return

    # Fill nan
    ensemble_numbers.fillna(0, inplace=True)
    ensemble_cumulative_returns.fillna(method='ffill', inplace=True)
    ensemble_cumulative_returns.fillna(0, inplace=True)

    ensemble_portfolios = [Portfolio(ensemble_prediction) for ensemble_prediction in
                           ensemble_predictions[(step - 1)::step]]

    for ensemble_portfolio in ensemble_portfolios:
        if ensemble_portfolio.empty:
            return None, None

    ensemble_outcomes = [ensemble_portfolio.outcome() for ensemble_portfolio in ensemble_portfolios]
    portfolio_returns = [ensemble_outcome[PORTFOLIO_RETURN] for ensemble_outcome in ensemble_outcomes]
    active_returns = [ensemble_outcome[ACTIVE_RETURN] for ensemble_outcome in ensemble_outcomes]
    active_risks = [ensemble_outcome[ACTIVE_RISK] for ensemble_outcome in ensemble_outcomes]
    information_ratios = [ensemble_outcome[IR] for ensemble_outcome in ensemble_outcomes]
    sharpe_ratios = [ensemble_outcome[SR] for ensemble_outcome in ensemble_outcomes]
    MDDs = [ensemble_outcome[MDD] for ensemble_outcome in ensemble_outcomes]
    alphas = [ensemble_outcome[FAMA_FRENCH_ALPHA] for ensemble_outcome in ensemble_outcomes]
    betas = [ensemble_outcome[FAMA_FRENCH_BETA] for ensemble_outcome in ensemble_outcomes]
    CAGRs = [ensemble_outcome[CAGR] for ensemble_outcome in ensemble_outcomes]
    rigid_accuracies = [_calculate_accuracy(ensemble_portfolio, predictions, quantile) for
                        ensemble_portfolio in ensemble_portfolios]
    decile_accuracies = [_calculate_accuracy(ensemble_portfolio, predictions, 10) for
                         ensemble_portfolio in ensemble_portfolios]
    quarter_accuracies = [_calculate_accuracy(ensemble_portfolio, predictions, 4) for
                          ensemble_portfolio in ensemble_portfolios]
    half_accuracies = [_calculate_accuracy(ensemble_portfolio, predictions, 2) for
                       ensemble_portfolio in ensemble_portfolios]

    ensemble_summary = pd.DataFrame({
        PORTFOLIO_RETURN: portfolio_returns,
        ACTIVE_RETURN: active_returns,
        ACTIVE_RISK: active_risks,
        IR: information_ratios,
        SR: sharpe_ratios,
        MDD: MDDs,
        FAMA_FRENCH_ALPHA: alphas,
        FAMA_FRENCH_BETA: betas,
        CAGR: CAGRs,
        RIGID_ACCURACY: rigid_accuracies,
        DECILE_ACCURACY: decile_accuracies,
        QUARTER_ACCURACY: quarter_accuracies,
        HALF_ACCURACY: half_accuracies,
    }, index=ensemble_numbers.columns)
    ensemble_summary.to_csv('summary/' + result_file_name + '.csv')
    for ensemble_prediction in ensemble_predictions:
        ensemble_prediction[DATE] = pd.to_datetime(ensemble_prediction[DATE], format='%Y-%m-%d')

    # Plot
    if show_plot:
        fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 8))

        # Company number
        ensemble_numbers.plot(ax=axes[0], colormap='Blues')
        axes[0].set_title('{}:{}, Top {}-quantile'.format(method.title(), model_name, quantile))
        axes[0].set_xlabel('Date')
        axes[0].set_ylabel('# of companies')
        axes[0].legend(loc='upper left')

        # Cumulative return
        ensemble_cumulative_returns.plot(ax=axes[1], colormap='Blues')
        axes[1].set_xlabel('Date')
        axes[1].set_ylabel('Return')
        axes[1].legend(loc='upper left')

        # Information ratio
        # ensembles = ensemble_cumulative_returns.columns
        # trend_model = np.polyfit(ensembles, information_ratios, 1)
        # get_trend = np.poly1d(trend_model)
        # axes[2].plot(ensembles, information_ratios, 'black', ensembles, get_trend(ensembles), 'r--')
        # axes[2].set_ylim(0.3, 0.5)
        # axes[2].set_xlabel('# of ensembles')
        # axes[2].set_ylabel('Information ratio')

        plt.savefig('summary/' + result_file_name + '.png')
        fig.show()

    return ensemble_summary, ensemble_portfolios
Esempio n. 14
0
GEOMETRIC = 'geometric'

QUANTILE = 'quantile'
PREDICTED_RET_1 = 'predict_return_1'
ACTUAL_RANK = 'actual_rank'
PREDICTED_RANK = 'predicted_rank'
COUNT = 'count'

RANK = 'rank'
CORRECT = 'correct'
RIGID_ACCURACY = 'rigid_accuracy'
DECILE_ACCURACY = 'decile_accuracy'
QUARTER_ACCURACY = 'quarter_accuracy'
HALF_ACCURACY = 'half_accuracy'

pf = Portfolio()
CD91_returns = pf.get_benchmark(CD91)[BENCHMARK_RET_1]
CD91_returns = CD91_returns.dropna()

actual_returns = pf[[DATE, CODE, RET_1]]


def get_intersection_ensemble_predictions(predictions, quantile: int = 40):
    """
    :return ensemble_predictions:
        DATE        | (datetime64)
        CODE        | (str)
    """
    selected_predictions = _select_predictions(predictions, quantile, [DATE, CODE])

    # Intersection
Esempio n. 15
0
STD_MOMENTUM = std + MOMENTUM
STD_QUALITY = std + QUALITY
STD_VOLATILITY = std + VOLATILITY

ROLLING_RET_1 = rolling + RET_1
ROLLING_VALUE = rolling + STD_VALUE
ROLLING_MOMENTUM = rolling + STD_MOMENTUM
ROLLING_QUALITY = rolling + STD_QUALITY
ROLLING_VOLATILITY = rolling + STD_VOLATILITY

PRED_VALUE = predicted + VALUE
PRED_MOMENTUM = predicted + MOMENTUM
PRED_QUALITY = predicted + QUALITY
PRED_VOLATILITY = predicted + VOLATILITY

pf = Portfolio()

pf[TRADING_CAPITAL] = pf[TRADING_VOLUME_RATIO] * pf[MKTCAP]

pf = pf.periodic_standardize(factor=RET_1)

pf = pf.periodic_standardize(factor=B_P)
pf = pf.periodic_standardize(factor=E_P)
pf[STD_VALUE] = (pf[std + B_P] + pf[std + E_P]) / 2

pf = pf.periodic_standardize(factor=MOM12_1)
pf[STD_MOMENTUM] = pf[std + MOM12_1]

pf = pf.periodic_standardize(factor=GP_A)
pf[STD_QUALITY] = pf[std + GP_A]