def sample_1052(): """ 10.5.2 基于特征的交易预测 :return: """ # 需要在有缓存的情况下运行 abu_result_tuple, _ = sample_1051_1(from_cache=True, show=False) from abupy import AbuUmpMainMul mul = AbuUmpMainMul.UmpMulFiter(orders_pd=abu_result_tuple.orders_pd, scaler=False) print('mul.df.head():\n', mul.df.head()) # 默认使用svm作为分类器 print('decision_tree_classifier cv please wait...') mul.estimator.decision_tree_classifier() mul.cross_val_accuracy_score() # 默认使用svm作为分类器 print('knn_classifier cv please wait...') # 默认使用svm作为分类器, 改分类器knn mul.estimator.knn_classifier() mul.cross_val_accuracy_score() from abupy import AbuUmpMainDeg deg = AbuUmpMainDeg.UmpDegFiter(orders_pd=abu_result_tuple.orders_pd) print('deg.df.head():\n', deg.df.head()) print('xgb_classifier cv please wait...') # 分类器使用GradientBoosting deg.estimator.xgb_classifier() deg.cross_val_accuracy_score() print('adaboost_classifier cv please wait...') # 分类器使用adaboost deg.estimator.adaboost_classifier(base_estimator=None) deg.cross_val_accuracy_score() print('train_test_split_xy please wait...') deg.train_test_split_xy()
def sample_ump(): ump_deg = AbuUmpMainDeg(predict=True) ump_jump = AbuUmpMainJump(predict=True) ump_price = AbuUmpMainPrice(predict=True) ump_wave = AbuUmpMainWave(predict=True) def apply_ml_features_ump(order, predicter, need_hit_cnt): if not isinstance(order.ml_features, dict): # 低版本pandas dict对象取出来会成为str ml_features = ast.literal_eval(order.ml_features) else: ml_features = order.ml_features return predicter.predict_kwargs(need_hit_cnt=need_hit_cnt, **ml_features) abu_result_tuple_train, abu_result_tuple_test, metrics_train, metrics_test = load_abu_result_tuple( ) # 选取有交易结果的数据order_has_result order_has_result = abu_result_tuple_test.orders_pd[ abu_result_tuple_test.orders_pd.result != 0] # 角度主裁开始裁决 order_has_result['ump_deg'] = order_has_result.apply(apply_ml_features_ump, axis=1, args=( ump_deg, 2, )) # 跳空主裁开始裁决 order_has_result['ump_jump'] = order_has_result.apply( apply_ml_features_ump, axis=1, args=( ump_jump, 2, )) # 波动主裁开始裁决 order_has_result['ump_wave'] = order_has_result.apply( apply_ml_features_ump, axis=1, args=( ump_wave, 2, )) # 价格主裁开始裁决 order_has_result['ump_price'] = order_has_result.apply( apply_ml_features_ump, axis=1, args=( ump_price, 2, )) block_pd = order_has_result.filter(regex='^ump_*') block_pd['sum_bk'] = block_pd.sum(axis=1) block_pd['result'] = order_has_result['result'] block_pd = block_pd[block_pd.sum_bk > 0] print('四个裁判整体拦截正确率{:.2f}%'.format( block_pd[block_pd.result == -1].result.count() / block_pd.result.count() * 100)) print('block_pd.tail():\n', block_pd.tail()) def sub_ump_show(block_name): sub_block_pd = block_pd[(block_pd[block_name] == 1)] # 如果失败就正确 -1->1 1->0 # noinspection PyTypeChecker sub_block_pd.result = np.where(sub_block_pd.result == -1, 1, 0) return accuracy_score(sub_block_pd[block_name], sub_block_pd.result) print('角度裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_deg') * 100)) print('跳空裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_jump') * 100)) print('波动裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_wave') * 100)) print('价格裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_price') * 100)) print('角度裁判拦截正确率{:.2f}%'.format(sub_ump_show('ump_deg') * 100))
def sample_112(): """ 11.2.1 角度主裁, 11.2.2 使用全局最优对分类簇集合进行筛选 :return: """ abu_result_tuple_train, abu_result_tuple_test, metrics_train, metrics_test = load_abu_result_tuple( ) orders_pd_train = abu_result_tuple_train.orders_pd # 参数为orders_pd ump_deg = AbuUmpMainDeg(orders_pd_train) # df即由之前ump_main_make_xy生成的类df,表11-1所示 print('ump_deg.fiter.df.head():\n', ump_deg.fiter.df.head()) # 耗时操作,大概需要10几分钟,具体根据电脑性能,cpu情况 _ = ump_deg.fit(brust_min=False, show=False) print('ump_deg.cprs:\n', ump_deg.cprs) max_failed_cluster = ump_deg.cprs.loc[ump_deg.cprs.lrs.argmax()] print('失败概率最大的分类簇{0}, 失败率为{1:.2f}%, 簇交易总数{2}, 簇平均交易获利{3:.2f}%'.format( ump_deg.cprs.lrs.argmax(), max_failed_cluster.lrs * 100, max_failed_cluster.lcs, max_failed_cluster.lms * 100)) cpt = int(ump_deg.cprs.lrs.argmax().split('_')[0]) print('cpt:\n', cpt) ump_deg.show_parse_rt(ump_deg.rts[cpt]) max_failed_cluster_orders = ump_deg.nts[ump_deg.cprs.lrs.argmax()] print('max_failed_cluster_orders:\n', max_failed_cluster_orders) abupy.ml.show_orders_hist( max_failed_cluster_orders, ['buy_deg_ang21', 'buy_deg_ang42', 'buy_deg_ang60', 'buy_deg_ang252']) print('分类簇中deg_ang60平均值为{0:.2f}'.format( max_failed_cluster_orders.buy_deg_ang60.mean())) print('分类簇中deg_ang21平均值为{0:.2f}'.format( max_failed_cluster_orders.buy_deg_ang21.mean())) print('分类簇中deg_ang42平均值为{0:.2f}'.format( max_failed_cluster_orders.buy_deg_ang42.mean())) print('分类簇中deg_ang252平均值为{0:.2f}'.format( max_failed_cluster_orders.buy_deg_ang252.mean())) abupy.ml.show_orders_hist( orders_pd_train, ['buy_deg_ang21', 'buy_deg_ang42', 'buy_deg_ang60', 'buy_deg_ang252']) print('训练数据集中deg_ang60平均值为{0:.2f}'.format( orders_pd_train.buy_deg_ang60.mean())) print('训练数据集中deg_ang21平均值为{0:.2f}'.format( orders_pd_train.buy_deg_ang21.mean())) print('训练数据集中deg_ang42平均值为{0:.2f}'.format( orders_pd_train.buy_deg_ang42.mean())) print('训练数据集中deg_ang252平均值为{0:.2f}'.format( orders_pd_train.buy_deg_ang252.mean())) """ 11.2.2 使用全局最优对分类簇集合进行筛选 """ brust_min = ump_deg.brust_min() print('brust_min:', brust_min) llps = ump_deg.cprs[(ump_deg.cprs['lps'] <= brust_min[0]) & (ump_deg.cprs['lms'] <= brust_min[1]) & (ump_deg.cprs['lrs'] >= brust_min[2])] print('llps:\n', llps) print(ump_deg.choose_cprs_component(llps)) ump_deg.dump_clf(llps)
def sample_112(): """ 11.2.1 角度主裁, 11.2.2 使用全局最优对分类簇集合进行筛选 :return: """ abu_result_tuple_train, abu_result_tuple_test, metrics_train, metrics_test = load_abu_result_tuple() orders_pd_train = abu_result_tuple_train.orders_pd # 参数为orders_pd ump_deg = AbuUmpMainDeg(orders_pd_train) # df即由之前ump_main_make_xy生成的类df,表11-1所示 print('ump_deg.fiter.df.head():\n', ump_deg.fiter.df.head()) # 耗时操作,大概需要10几分钟,具体根据电脑性能,cpu情况 _ = ump_deg.fit(brust_min=False) print('ump_deg.cprs:\n', ump_deg.cprs) max_failed_cluster = ump_deg.cprs.loc[ump_deg.cprs.lrs.argmax()] print('失败概率最大的分类簇{0}, 失败率为{1:.2f}%, 簇交易总数{2}, 簇平均交易获利{3:.2f}%'.format( ump_deg.cprs.lrs.argmax(), max_failed_cluster.lrs * 100, max_failed_cluster.lcs, max_failed_cluster.lms * 100)) cpt = int(ump_deg.cprs.lrs.argmax().split('_')[0]) print('cpt:\n', cpt) ump_deg.show_parse_rt(ump_deg.rts[cpt]) max_failed_cluster_orders = ump_deg.nts[ump_deg.cprs.lrs.argmax()] print('max_failed_cluster_orders:\n', max_failed_cluster_orders) ml.show_orders_hist(max_failed_cluster_orders, ['buy_deg_ang21', 'buy_deg_ang42', 'buy_deg_ang60', 'buy_deg_ang252']) print('分类簇中deg_ang60平均值为{0:.2f}'.format( max_failed_cluster_orders.buy_deg_ang60.mean())) print('分类簇中deg_ang21平均值为{0:.2f}'.format( max_failed_cluster_orders.buy_deg_ang21.mean())) print('分类簇中deg_ang42平均值为{0:.2f}'.format( max_failed_cluster_orders.buy_deg_ang42.mean())) print('分类簇中deg_ang252平均值为{0:.2f}'.format( max_failed_cluster_orders.buy_deg_ang252.mean())) ml.show_orders_hist(orders_pd_train, ['buy_deg_ang21', 'buy_deg_ang42', 'buy_deg_ang60', 'buy_deg_ang252']) print('训练数据集中deg_ang60平均值为{0:.2f}'.format( orders_pd_train.buy_deg_ang60.mean())) print('训练数据集中deg_ang21平均值为{0:.2f}'.format( orders_pd_train.buy_deg_ang21.mean())) print('训练数据集中deg_ang42平均值为{0:.2f}'.format( orders_pd_train.buy_deg_ang42.mean())) print('训练数据集中deg_ang252平均值为{0:.2f}'.format( orders_pd_train.buy_deg_ang252.mean())) """ 11.2.2 使用全局最优对分类簇集合进行筛选 """ brust_min = ump_deg.brust_min() print('brust_min:', brust_min) llps = ump_deg.cprs[(ump_deg.cprs['lps'] <= brust_min[0]) & (ump_deg.cprs['lms'] <= brust_min[1]) & ( ump_deg.cprs['lrs'] >= brust_min[2])] print('llps:\n', llps) print(ump_deg.choose_cprs_component(llps)) ump_deg.dump_clf(llps)
from abupy import AbuUmpMainMul mul = AbuUmpMainMul.UmpMulFiter(orders_pd=abu_result_tuple.orders_pd, scaler=False) mul.df.head() mul().cross_val_accuracy_score() # 随机森林 mul().estimator.random_forest_classifier() mul().cross_val_accuracy_score() #使用历史拟合角度特征实验 from abupy import AbuUmpMainDeg deg = AbuUmpMainDeg.UmpDegFilter(orders_pd=abu_result_tuple.orders_pd) # 分类器使用adaboost deg().estimator.adaboost_classifier() deg.df.head() deg().cross_val_accuracy_score() # 混淆矩阵分布 deg().train_test_split_xy() #使用更多特征 from abupy import AbuUmpMainFull full = AbuUmpMainFull.UmpFullFilter(orders_pd=abu_result_tuple.orders_pd) #继续使用adaboost full().estimator.adaboost_classifier() #查看full所有特征名称