Example #1
0
def sample_1052():
    """
    10.5.2 基于特征的交易预测
    :return:
    """
    # 需要在有缓存的情况下运行
    abu_result_tuple, _ = sample_1051_1(from_cache=True, show=False)

    from abupy import AbuUmpMainMul
    mul = AbuUmpMainMul.UmpMulFiter(orders_pd=abu_result_tuple.orders_pd,
                                    scaler=False)
    print('mul.df.head():\n', mul.df.head())

    # 默认使用svm作为分类器
    print('decision_tree_classifier cv please wait...')
    mul.estimator.decision_tree_classifier()
    mul.cross_val_accuracy_score()

    # 默认使用svm作为分类器
    print('knn_classifier cv please wait...')
    # 默认使用svm作为分类器, 改分类器knn
    mul.estimator.knn_classifier()
    mul.cross_val_accuracy_score()

    from abupy import AbuUmpMainDeg
    deg = AbuUmpMainDeg.UmpDegFiter(orders_pd=abu_result_tuple.orders_pd)
    print('deg.df.head():\n', deg.df.head())

    print('xgb_classifier cv please wait...')
    # 分类器使用GradientBoosting
    deg.estimator.xgb_classifier()
    deg.cross_val_accuracy_score()

    print('adaboost_classifier cv please wait...')
    # 分类器使用adaboost
    deg.estimator.adaboost_classifier(base_estimator=None)
    deg.cross_val_accuracy_score()

    print('train_test_split_xy please wait...')
    deg.train_test_split_xy()
Example #2
0
def sample_ump():
    ump_deg = AbuUmpMainDeg(predict=True)
    ump_jump = AbuUmpMainJump(predict=True)
    ump_price = AbuUmpMainPrice(predict=True)
    ump_wave = AbuUmpMainWave(predict=True)

    def apply_ml_features_ump(order, predicter, need_hit_cnt):
        if not isinstance(order.ml_features, dict):
            # 低版本pandas dict对象取出来会成为str
            ml_features = ast.literal_eval(order.ml_features)
        else:
            ml_features = order.ml_features

        return predicter.predict_kwargs(need_hit_cnt=need_hit_cnt,
                                        **ml_features)

    abu_result_tuple_train, abu_result_tuple_test, metrics_train, metrics_test = load_abu_result_tuple(
    )
    # 选取有交易结果的数据order_has_result
    order_has_result = abu_result_tuple_test.orders_pd[
        abu_result_tuple_test.orders_pd.result != 0]
    # 角度主裁开始裁决
    order_has_result['ump_deg'] = order_has_result.apply(apply_ml_features_ump,
                                                         axis=1,
                                                         args=(
                                                             ump_deg,
                                                             2,
                                                         ))
    # 跳空主裁开始裁决
    order_has_result['ump_jump'] = order_has_result.apply(
        apply_ml_features_ump, axis=1, args=(
            ump_jump,
            2,
        ))
    # 波动主裁开始裁决
    order_has_result['ump_wave'] = order_has_result.apply(
        apply_ml_features_ump, axis=1, args=(
            ump_wave,
            2,
        ))
    # 价格主裁开始裁决
    order_has_result['ump_price'] = order_has_result.apply(
        apply_ml_features_ump, axis=1, args=(
            ump_price,
            2,
        ))

    block_pd = order_has_result.filter(regex='^ump_*')
    block_pd['sum_bk'] = block_pd.sum(axis=1)
    block_pd['result'] = order_has_result['result']

    block_pd = block_pd[block_pd.sum_bk > 0]
    print('四个裁判整体拦截正确率{:.2f}%'.format(
        block_pd[block_pd.result == -1].result.count() /
        block_pd.result.count() * 100))
    print('block_pd.tail():\n', block_pd.tail())

    def sub_ump_show(block_name):
        sub_block_pd = block_pd[(block_pd[block_name] == 1)]
        # 如果失败就正确 -1->1 1->0
        # noinspection PyTypeChecker
        sub_block_pd.result = np.where(sub_block_pd.result == -1, 1, 0)
        return accuracy_score(sub_block_pd[block_name], sub_block_pd.result)

    print('角度裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_deg') * 100))
    print('跳空裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_jump') * 100))
    print('波动裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_wave') * 100))
    print('价格裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_price') * 100))

    print('角度裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_deg') * 100))
Example #3
0
def sample_112():
    """
    11.2.1 角度主裁, 11.2.2 使用全局最优对分类簇集合进行筛选
    :return:
    """

    abu_result_tuple_train, abu_result_tuple_test, metrics_train, metrics_test = load_abu_result_tuple(
    )
    orders_pd_train = abu_result_tuple_train.orders_pd
    # 参数为orders_pd
    ump_deg = AbuUmpMainDeg(orders_pd_train)
    # df即由之前ump_main_make_xy生成的类df,表11-1所示
    print('ump_deg.fiter.df.head():\n', ump_deg.fiter.df.head())

    # 耗时操作,大概需要10几分钟,具体根据电脑性能,cpu情况
    _ = ump_deg.fit(brust_min=False, show=False)
    print('ump_deg.cprs:\n', ump_deg.cprs)
    max_failed_cluster = ump_deg.cprs.loc[ump_deg.cprs.lrs.argmax()]
    print('失败概率最大的分类簇{0}, 失败率为{1:.2f}%, 簇交易总数{2}, 簇平均交易获利{3:.2f}%'.format(
        ump_deg.cprs.lrs.argmax(), max_failed_cluster.lrs * 100,
        max_failed_cluster.lcs, max_failed_cluster.lms * 100))

    cpt = int(ump_deg.cprs.lrs.argmax().split('_')[0])
    print('cpt:\n', cpt)
    ump_deg.show_parse_rt(ump_deg.rts[cpt])

    max_failed_cluster_orders = ump_deg.nts[ump_deg.cprs.lrs.argmax()]

    print('max_failed_cluster_orders:\n', max_failed_cluster_orders)

    abupy.ml.show_orders_hist(
        max_failed_cluster_orders,
        ['buy_deg_ang21', 'buy_deg_ang42', 'buy_deg_ang60', 'buy_deg_ang252'])
    print('分类簇中deg_ang60平均值为{0:.2f}'.format(
        max_failed_cluster_orders.buy_deg_ang60.mean()))

    print('分类簇中deg_ang21平均值为{0:.2f}'.format(
        max_failed_cluster_orders.buy_deg_ang21.mean()))

    print('分类簇中deg_ang42平均值为{0:.2f}'.format(
        max_failed_cluster_orders.buy_deg_ang42.mean()))

    print('分类簇中deg_ang252平均值为{0:.2f}'.format(
        max_failed_cluster_orders.buy_deg_ang252.mean()))

    abupy.ml.show_orders_hist(
        orders_pd_train,
        ['buy_deg_ang21', 'buy_deg_ang42', 'buy_deg_ang60', 'buy_deg_ang252'])
    print('训练数据集中deg_ang60平均值为{0:.2f}'.format(
        orders_pd_train.buy_deg_ang60.mean()))

    print('训练数据集中deg_ang21平均值为{0:.2f}'.format(
        orders_pd_train.buy_deg_ang21.mean()))

    print('训练数据集中deg_ang42平均值为{0:.2f}'.format(
        orders_pd_train.buy_deg_ang42.mean()))

    print('训练数据集中deg_ang252平均值为{0:.2f}'.format(
        orders_pd_train.buy_deg_ang252.mean()))
    """
        11.2.2 使用全局最优对分类簇集合进行筛选
    """
    brust_min = ump_deg.brust_min()
    print('brust_min:', brust_min)

    llps = ump_deg.cprs[(ump_deg.cprs['lps'] <= brust_min[0])
                        & (ump_deg.cprs['lms'] <= brust_min[1]) &
                        (ump_deg.cprs['lrs'] >= brust_min[2])]
    print('llps:\n', llps)

    print(ump_deg.choose_cprs_component(llps))
    ump_deg.dump_clf(llps)
Example #4
0
File: c11.py Project: 3774257/abu
def sample_112():
    """
    11.2.1 角度主裁, 11.2.2 使用全局最优对分类簇集合进行筛选
    :return:
    """

    abu_result_tuple_train, abu_result_tuple_test, metrics_train, metrics_test = load_abu_result_tuple()
    orders_pd_train = abu_result_tuple_train.orders_pd
    # 参数为orders_pd
    ump_deg = AbuUmpMainDeg(orders_pd_train)
    # df即由之前ump_main_make_xy生成的类df,表11-1所示
    print('ump_deg.fiter.df.head():\n', ump_deg.fiter.df.head())

    # 耗时操作,大概需要10几分钟,具体根据电脑性能,cpu情况
    _ = ump_deg.fit(brust_min=False)
    print('ump_deg.cprs:\n', ump_deg.cprs)
    max_failed_cluster = ump_deg.cprs.loc[ump_deg.cprs.lrs.argmax()]
    print('失败概率最大的分类簇{0}, 失败率为{1:.2f}%, 簇交易总数{2}, 簇平均交易获利{3:.2f}%'.format(
        ump_deg.cprs.lrs.argmax(), max_failed_cluster.lrs * 100, max_failed_cluster.lcs, max_failed_cluster.lms * 100))

    cpt = int(ump_deg.cprs.lrs.argmax().split('_')[0])
    print('cpt:\n', cpt)
    ump_deg.show_parse_rt(ump_deg.rts[cpt])

    max_failed_cluster_orders = ump_deg.nts[ump_deg.cprs.lrs.argmax()]

    print('max_failed_cluster_orders:\n', max_failed_cluster_orders)

    ml.show_orders_hist(max_failed_cluster_orders,
                        ['buy_deg_ang21', 'buy_deg_ang42', 'buy_deg_ang60', 'buy_deg_ang252'])
    print('分类簇中deg_ang60平均值为{0:.2f}'.format(
        max_failed_cluster_orders.buy_deg_ang60.mean()))

    print('分类簇中deg_ang21平均值为{0:.2f}'.format(
        max_failed_cluster_orders.buy_deg_ang21.mean()))

    print('分类簇中deg_ang42平均值为{0:.2f}'.format(
        max_failed_cluster_orders.buy_deg_ang42.mean()))

    print('分类簇中deg_ang252平均值为{0:.2f}'.format(
        max_failed_cluster_orders.buy_deg_ang252.mean()))

    ml.show_orders_hist(orders_pd_train, ['buy_deg_ang21', 'buy_deg_ang42', 'buy_deg_ang60', 'buy_deg_ang252'])
    print('训练数据集中deg_ang60平均值为{0:.2f}'.format(
        orders_pd_train.buy_deg_ang60.mean()))

    print('训练数据集中deg_ang21平均值为{0:.2f}'.format(
        orders_pd_train.buy_deg_ang21.mean()))

    print('训练数据集中deg_ang42平均值为{0:.2f}'.format(
        orders_pd_train.buy_deg_ang42.mean()))

    print('训练数据集中deg_ang252平均值为{0:.2f}'.format(
        orders_pd_train.buy_deg_ang252.mean()))

    """
        11.2.2 使用全局最优对分类簇集合进行筛选
    """
    brust_min = ump_deg.brust_min()
    print('brust_min:', brust_min)

    llps = ump_deg.cprs[(ump_deg.cprs['lps'] <= brust_min[0]) & (ump_deg.cprs['lms'] <= brust_min[1]) & (
        ump_deg.cprs['lrs'] >= brust_min[2])]
    print('llps:\n', llps)

    print(ump_deg.choose_cprs_component(llps))
    ump_deg.dump_clf(llps)
Example #5
0
from abupy import AbuUmpMainMul

mul = AbuUmpMainMul.UmpMulFiter(orders_pd=abu_result_tuple.orders_pd,
                                scaler=False)

mul.df.head()

mul().cross_val_accuracy_score()

# 随机森林
mul().estimator.random_forest_classifier()
mul().cross_val_accuracy_score()

#使用历史拟合角度特征实验
from abupy import AbuUmpMainDeg
deg = AbuUmpMainDeg.UmpDegFilter(orders_pd=abu_result_tuple.orders_pd)
# 分类器使用adaboost
deg().estimator.adaboost_classifier()
deg.df.head()

deg().cross_val_accuracy_score()

# 混淆矩阵分布
deg().train_test_split_xy()

#使用更多特征
from abupy import AbuUmpMainFull
full = AbuUmpMainFull.UmpFullFilter(orders_pd=abu_result_tuple.orders_pd)
#继续使用adaboost
full().estimator.adaboost_classifier()
#查看full所有特征名称