def get_test_result_3class(variant, clf, pool: Pool):
    probs = clf.predict_proba(pool)
    poswl, negwl = st.WinLoss(), st.WinLoss()
    min_pos_proba = variant.min_proba
    min_neg_proba = variant.get_min_neg_proba()
    for prob0z1, lab in zip(probs, pool.get_label()):
        if min_pos_proba is not None and prob0z1[2] >= min_pos_proba:
            poswl.hit(lab == 1)
        elif min_neg_proba is not None and prob0z1[0] >= min_neg_proba:
            negwl.hit(lab == -1)
    profit, pos_profit, neg_profit = 0.0, 0.0, 0.0
    profit_ratios = variant.profit_ratios
    if poswl:
        pos_profit = round(
            poswl.size * (poswl.ratio - profit_ratios.pos_ratio), 3)
    if negwl:
        neg_profit = round(
            negwl.size * (negwl.ratio - profit_ratios.neg_ratio), 3)
    profit = pos_profit + neg_profit
    return cco.Result(
        name=variant.name,
        mean=cco.fmt((poswl + negwl).ratio),
        leny=len(pool.get_label()),
        scr=cco.fmt(clf.score(pool)),
        poswl=poswl,
        negwl=negwl,
        profit=profit,
        pos_profit=pos_profit,
    )
def get_result(variant, clf, X_test, y_test):
    probs = clf.predict_proba(X_test)
    poswl, negwl = st.WinLoss(), st.WinLoss()
    min_pos_proba = variant.min_probas.pos
    min_neg_proba = variant.min_probas.neg
    for prob01, lab in zip(probs, y_test):
        if min_pos_proba is not None and prob01[1] >= min_pos_proba:
            poswl.hit(lab == 1)
        elif min_neg_proba is not None and prob01[0] >= min_neg_proba:
            negwl.hit(lab == 0)
    profit, pos_profit, neg_profit = 0.0, 0.0, 0.0
    profit_ratios = variant.profit_ratios
    if poswl:
        pos_profit = round(
            poswl.size * (poswl.ratio - profit_ratios.pos_ratio), 3)
    if negwl:
        neg_profit = round(
            negwl.size * (negwl.ratio - profit_ratios.neg_ratio), 3)
    profit = pos_profit + neg_profit
    return Result(
        name=variant.name,
        mean=fmt((poswl + negwl).ratio),
        leny=len(y_test),
        scr=fmt(clf.score(X_test, y_test)),
        poswl=poswl,
        negwl=negwl,
        profit=profit,
        pos_profit=pos_profit,
    )
Example #3
0
 def make_struct_winloss_dict():
     result = dict()
     result[co.Struct()] = st.WinLoss(9999, 1)
     result[co.Struct(surface="Hard",
                      level="main")] = st.WinLoss(999, 1)
     result[co.Struct(level="masters",
                      surface="Clay")] = st.WinLoss(70, 30)
     result[co.Struct(level="main", rnd="First")] = st.WinLoss(66, 34)
     result[co.Struct(level="chal", surface="Grass",
                      rnd="1/4")] = st.WinLoss(0, 100)
     return result
def series_bin_ratio(series, pos_value=1, neg_value=0):
    """if pandas series has 0, 1 values, then return n1 / (n1 + n0)"""
    if isinstance(series, pd.Series):
        val_counts = series.value_counts()
    else:
        val_counts = pd.Series(series).value_counts()
    if pos_value in val_counts and neg_value in val_counts:
        wl = st.WinLoss(val_counts[pos_value], val_counts[neg_value])
        return wl.ratio
    elif pos_value in val_counts:
        wl = st.WinLoss(val_counts[pos_value], 0)
        return wl.ratio
    elif neg_value in val_counts:
        wl = st.WinLoss(0, val_counts[neg_value])
        return wl.ratio
def get_dif_ratio(sex, pid1, pid2, min_date=None, max_date=None):
    result1, result2 = st.WinLoss(), st.WinLoss()
    for date, match_results in results_dict[sex].items():
        if min_date is not None and date < min_date:
            continue
        if max_date is not None and date > max_date:
            break
        for match_res in match_results:
            if match_res.first_id == pid1:
                result1.add_win(1)
            elif match_res.second_id == pid1:
                result1.add_loss(1)

            if match_res.first_id == pid2:
                result2.add_win(1)
            elif match_res.second_id == pid2:
                result2.add_loss(1)
    return winloss_to_float(result2) - winloss_to_float(result1)
Example #6
0
def random_train(variant: cco.Variant, msg="", split=True, plot=False):
    all_name_imp = defaultdict(lambda: 0.0)
    prc_list, acc_list, auc_list, treecnt_list, lrate_list = [], [], [], [], []
    all_wl = st.WinLoss()
    all_test_size = 0

    for seed in random_args.iter_seeds():
        put_seed(seed)
        variant.set_random_state(seed)

        for random_state in random_args.iter_states():
            log.info(f"random_state={random_state} start learning")
            data, _ = fill_data(
                variant,
                split=split,
                is_shuffle=args.shuffle,
                random_state=random_state,
            )
            clf = variant.make_clf_fit(data,
                                       metric_name,
                                       random_seed=seed,
                                       plot=plot)
            name_imp = variant.get_features_importance(
                variant.feature_names.get_list())
            for name, imp in name_imp.items():
                all_name_imp[name] += imp
            prec = precision_score(data.test.y, clf.predict(data.test.X))
            acc = accuracy_score(data.test.y, clf.predict(data.test.X))
            auc = roc_auc_score(data.test.y,
                                clf.predict_proba(data.test.X)[:, 1])
            prc_list.append(prec)
            acc_list.append(acc)
            auc_list.append(auc)
            if variant.is_cb_native():
                treecnt_list.append(clf.tree_count_)
                lrate_list.append(clf.learning_rate_)
            log.info(f"gomean acc {sum(acc_list) / len(acc_list)}")
            res = variant.make_test_result(data)
            all_wl += res.poswl + res.negwl
            all_test_size += data.test.X.shape[0]

    log.info(f"******************************************\n"
             f"*****{msg}*** {variant.name} results******\n")
    log.info(f"mean_prc {sum(prc_list) / random_args.space_size()}")
    log.info(f"mean_acc {sum(acc_list) / random_args.space_size()}")
    log.info(f"mean_auc {sum(auc_list) / random_args.space_size()}")
    if variant.is_cb_native():
        log.info(f"treecnt {sum(treecnt_list) / random_args.space_size()}")
        log.info(f"lrate {sum(lrate_list) / random_args.space_size()}")
    log.info(f"all_wl {all_wl.ratio_size_str(precision=4)} "
             f"ratio {round(all_wl.size / all_test_size, 3)}")
    log.info("all_name_imp:")
    all_name_imp_list = [(k, v / random_args.space_size())
                         for k, v in all_name_imp.items()]
    all_name_imp_list.sort(key=lambda it: it[1], reverse=True)
    log.info("\n" + pprint.pformat(all_name_imp_list))
Example #7
0
def decided_winloss_by_set2_winner(sex, soft_level, date, set1_score,
                                   set2_score):
    set1, set2 = _make_set2winner_orient(set1_score, set2_score)
    in_ywn = tt.get_year_weeknum(date)
    wl_res = st.WinLoss()
    dct = data_dict[(sex, soft_level)]
    for ywn, scr_dct in dct.items():
        if ywn >= in_ywn:
            break
        wl_res += scr_dct[(set1, set2)]
    return wl_res
def player_winloss(sex, ident, min_date=None, max_date=None, as_float=False):
    result = st.WinLoss()
    for date, match_results in results_dict[sex].items():
        if min_date is not None and date < min_date:
            continue
        if max_date is not None and date > max_date:
            break
        for match_res in match_results:
            if match_res.first_id == ident:
                result.add_win(1)
            elif match_res.second_id == ident:
                result.add_loss(1)
    return winloss_to_float(result) if as_float else result
def player_winloss(sex, aspect, ident, max_size, min_date=None, max_date=None):
    """return WinLoss. dates do as semi-closed range: [min_date,...,max_date)"""
    wl = st.WinLoss()
    dct = recovery_dict if aspect == RECOVERY_ASPECT else keep_dict
    for date, match_results_list in dct[sex].items():
        if max_date is not None and date >= max_date:
            continue
        if min_date is not None and date < min_date:
            break
        for plr_id, set2_win in match_results_list:
            if plr_id == ident:
                wl.hit(set2_win)
                if wl.size >= max_size:
                    return wl
    return wl
    def test_winloss_compare(self):
        self._strict_lt_compare(st.WinLoss(1, 3), st.WinLoss(2, 5))

        self.assertEqual(st.WinLoss(1, 3), st.WinLoss(10, 30))
Example #11
0
 def __init__(self):
     self.money_delta = 0.0
     self.win_loss = st.WinLoss()