def get_test_result_3class(variant, clf, pool: Pool): probs = clf.predict_proba(pool) poswl, negwl = st.WinLoss(), st.WinLoss() min_pos_proba = variant.min_proba min_neg_proba = variant.get_min_neg_proba() for prob0z1, lab in zip(probs, pool.get_label()): if min_pos_proba is not None and prob0z1[2] >= min_pos_proba: poswl.hit(lab == 1) elif min_neg_proba is not None and prob0z1[0] >= min_neg_proba: negwl.hit(lab == -1) profit, pos_profit, neg_profit = 0.0, 0.0, 0.0 profit_ratios = variant.profit_ratios if poswl: pos_profit = round( poswl.size * (poswl.ratio - profit_ratios.pos_ratio), 3) if negwl: neg_profit = round( negwl.size * (negwl.ratio - profit_ratios.neg_ratio), 3) profit = pos_profit + neg_profit return cco.Result( name=variant.name, mean=cco.fmt((poswl + negwl).ratio), leny=len(pool.get_label()), scr=cco.fmt(clf.score(pool)), poswl=poswl, negwl=negwl, profit=profit, pos_profit=pos_profit, )
def get_result(variant, clf, X_test, y_test): probs = clf.predict_proba(X_test) poswl, negwl = st.WinLoss(), st.WinLoss() min_pos_proba = variant.min_probas.pos min_neg_proba = variant.min_probas.neg for prob01, lab in zip(probs, y_test): if min_pos_proba is not None and prob01[1] >= min_pos_proba: poswl.hit(lab == 1) elif min_neg_proba is not None and prob01[0] >= min_neg_proba: negwl.hit(lab == 0) profit, pos_profit, neg_profit = 0.0, 0.0, 0.0 profit_ratios = variant.profit_ratios if poswl: pos_profit = round( poswl.size * (poswl.ratio - profit_ratios.pos_ratio), 3) if negwl: neg_profit = round( negwl.size * (negwl.ratio - profit_ratios.neg_ratio), 3) profit = pos_profit + neg_profit return Result( name=variant.name, mean=fmt((poswl + negwl).ratio), leny=len(y_test), scr=fmt(clf.score(X_test, y_test)), poswl=poswl, negwl=negwl, profit=profit, pos_profit=pos_profit, )
def make_struct_winloss_dict(): result = dict() result[co.Struct()] = st.WinLoss(9999, 1) result[co.Struct(surface="Hard", level="main")] = st.WinLoss(999, 1) result[co.Struct(level="masters", surface="Clay")] = st.WinLoss(70, 30) result[co.Struct(level="main", rnd="First")] = st.WinLoss(66, 34) result[co.Struct(level="chal", surface="Grass", rnd="1/4")] = st.WinLoss(0, 100) return result
def series_bin_ratio(series, pos_value=1, neg_value=0): """if pandas series has 0, 1 values, then return n1 / (n1 + n0)""" if isinstance(series, pd.Series): val_counts = series.value_counts() else: val_counts = pd.Series(series).value_counts() if pos_value in val_counts and neg_value in val_counts: wl = st.WinLoss(val_counts[pos_value], val_counts[neg_value]) return wl.ratio elif pos_value in val_counts: wl = st.WinLoss(val_counts[pos_value], 0) return wl.ratio elif neg_value in val_counts: wl = st.WinLoss(0, val_counts[neg_value]) return wl.ratio
def get_dif_ratio(sex, pid1, pid2, min_date=None, max_date=None): result1, result2 = st.WinLoss(), st.WinLoss() for date, match_results in results_dict[sex].items(): if min_date is not None and date < min_date: continue if max_date is not None and date > max_date: break for match_res in match_results: if match_res.first_id == pid1: result1.add_win(1) elif match_res.second_id == pid1: result1.add_loss(1) if match_res.first_id == pid2: result2.add_win(1) elif match_res.second_id == pid2: result2.add_loss(1) return winloss_to_float(result2) - winloss_to_float(result1)
def random_train(variant: cco.Variant, msg="", split=True, plot=False): all_name_imp = defaultdict(lambda: 0.0) prc_list, acc_list, auc_list, treecnt_list, lrate_list = [], [], [], [], [] all_wl = st.WinLoss() all_test_size = 0 for seed in random_args.iter_seeds(): put_seed(seed) variant.set_random_state(seed) for random_state in random_args.iter_states(): log.info(f"random_state={random_state} start learning") data, _ = fill_data( variant, split=split, is_shuffle=args.shuffle, random_state=random_state, ) clf = variant.make_clf_fit(data, metric_name, random_seed=seed, plot=plot) name_imp = variant.get_features_importance( variant.feature_names.get_list()) for name, imp in name_imp.items(): all_name_imp[name] += imp prec = precision_score(data.test.y, clf.predict(data.test.X)) acc = accuracy_score(data.test.y, clf.predict(data.test.X)) auc = roc_auc_score(data.test.y, clf.predict_proba(data.test.X)[:, 1]) prc_list.append(prec) acc_list.append(acc) auc_list.append(auc) if variant.is_cb_native(): treecnt_list.append(clf.tree_count_) lrate_list.append(clf.learning_rate_) log.info(f"gomean acc {sum(acc_list) / len(acc_list)}") res = variant.make_test_result(data) all_wl += res.poswl + res.negwl all_test_size += data.test.X.shape[0] log.info(f"******************************************\n" f"*****{msg}*** {variant.name} results******\n") log.info(f"mean_prc {sum(prc_list) / random_args.space_size()}") log.info(f"mean_acc {sum(acc_list) / random_args.space_size()}") log.info(f"mean_auc {sum(auc_list) / random_args.space_size()}") if variant.is_cb_native(): log.info(f"treecnt {sum(treecnt_list) / random_args.space_size()}") log.info(f"lrate {sum(lrate_list) / random_args.space_size()}") log.info(f"all_wl {all_wl.ratio_size_str(precision=4)} " f"ratio {round(all_wl.size / all_test_size, 3)}") log.info("all_name_imp:") all_name_imp_list = [(k, v / random_args.space_size()) for k, v in all_name_imp.items()] all_name_imp_list.sort(key=lambda it: it[1], reverse=True) log.info("\n" + pprint.pformat(all_name_imp_list))
def decided_winloss_by_set2_winner(sex, soft_level, date, set1_score, set2_score): set1, set2 = _make_set2winner_orient(set1_score, set2_score) in_ywn = tt.get_year_weeknum(date) wl_res = st.WinLoss() dct = data_dict[(sex, soft_level)] for ywn, scr_dct in dct.items(): if ywn >= in_ywn: break wl_res += scr_dct[(set1, set2)] return wl_res
def player_winloss(sex, ident, min_date=None, max_date=None, as_float=False): result = st.WinLoss() for date, match_results in results_dict[sex].items(): if min_date is not None and date < min_date: continue if max_date is not None and date > max_date: break for match_res in match_results: if match_res.first_id == ident: result.add_win(1) elif match_res.second_id == ident: result.add_loss(1) return winloss_to_float(result) if as_float else result
def player_winloss(sex, aspect, ident, max_size, min_date=None, max_date=None): """return WinLoss. dates do as semi-closed range: [min_date,...,max_date)""" wl = st.WinLoss() dct = recovery_dict if aspect == RECOVERY_ASPECT else keep_dict for date, match_results_list in dct[sex].items(): if max_date is not None and date >= max_date: continue if min_date is not None and date < min_date: break for plr_id, set2_win in match_results_list: if plr_id == ident: wl.hit(set2_win) if wl.size >= max_size: return wl return wl
def test_winloss_compare(self): self._strict_lt_compare(st.WinLoss(1, 3), st.WinLoss(2, 5)) self.assertEqual(st.WinLoss(1, 3), st.WinLoss(10, 30))
def __init__(self): self.money_delta = 0.0 self.win_loss = st.WinLoss()