Exemple #1
0
    def set_state(self, results):
        N, self.pdf = LLRcalc.results_to_pdf(results)
        if self.elo_model == "normalized":
            mu, var = LLRcalc.stats(self.pdf)  # code duplication with LLRcalc
            if len(results) == 5:
                self.sigma_pg = (2 * var)**0.5
            elif len(results) == 3:
                self.sigma_pg = var**0.5
            else:
                assert False
        self.s0, self.s1 = [
            self.elo_to_score(elo) for elo in (self.elo0, self.elo1)
        ]

        mu_LLR, var_LLR = self.LLR_drift_variance(self.pdf, self.s0, self.s1,
                                                  None)

        # llr estimate
        self.llr = N * mu_LLR
        self.T = N

        # now normalize llr (if llr is not legal then the implications
        # of this are unclear)
        slope = self.llr / N
        if self.llr > 1.03 * self.b or self.llr < 1.03 * self.a:
            self.clamped = True
        if self.llr < self.a:
            self.T = self.a / slope
            self.llr = self.a
        elif self.llr > self.b:
            self.T = self.b / slope
            self.llr = self.b
Exemple #2
0
def SPRT_elo(R, alpha=0.05, beta=0.05, p=0.05, elo0=None, elo1=None, elo_model=None):
    """
    Calculate an elo estimate from an SPRT test."""
    assert elo_model in ["BayesElo", "logistic"]

    # Estimate drawelo out of sample
    R3 = LLRcalc.regularize([R["losses"], R["draws"], R["wins"]])
    drawelo = draw_elo_calc(R3)

    # Convert the bounds to logistic elo if necessary
    if elo_model == "BayesElo":
        lelo0, lelo1 = [bayeselo_to_elo(elo_, drawelo) for elo_ in (elo0, elo1)]
    else:
        lelo0, lelo1 = elo0, elo1

    # Make the elo estimation object
    sp = sprt.sprt(alpha=alpha, beta=beta, elo0=lelo0, elo1=lelo1)

    # Feed the results
    if "pentanomial" in R.keys():
        R_ = R["pentanomial"]
    else:
        R_ = R3
    sp.set_state(R_)

    # Get the elo estimates
    a = sp.analytics(p)

    # Override the LLR approximation with the exact one
    a["LLR"] = LLRcalc.LLR_logistic(lelo0, lelo1, R_)
    del a["clamped"]
    # Now return the estimates
    return a
Exemple #3
0
 def __init__(self, alpha=0.05, beta=0.05, elo0=0, elo1=5):
     self.a = math.log(beta / (1 - alpha))
     self.b = math.log((1 - beta) / alpha)
     self.elo0 = elo0
     self.elo1 = elo1
     self.s0 = LLRcalc.L_(elo0)
     self.s1 = LLRcalc.L_(elo1)
     self.clamped = False
     self.LLR_drift_variance = LLRcalc.LLR_drift_variance_alt2
Exemple #4
0
 def elo_to_score(self, elo):
     """
     "elo" is expressed in our current elo_model."""
     if self.elo_model == "normalized":
         nt = elo / LLRcalc.nelo_divided_by_nt
         return nt * self.sigma_pg + 0.5
     else:
         return LLRcalc.L_(elo)
Exemple #5
0
 def lelo_to_elo(self, lelo):
     """
     For external use. "elo" is expressed in our current elo_model.
     "lelo" is logistic."""
     if self.elo_model == "logistic":
         return lelo
     score = LLRcalc.L_(lelo)
     nt = (score - 0.5) / self.sigma_pg
     return nt * LLRcalc.nelo_divided_by_nt
Exemple #6
0
 def outcome_prob(self, elo):
     """
     The probability of a test with the given elo with worse outcome
     (faster fail, slower pass or a pass changed into a fail)."""
     s = LLRcalc.L_(elo)
     mu_LLR, var_LLR = self.LLR_drift_variance(self.pdf, self.s0, self.s1,
                                               s)
     sigma_LLR = math.sqrt(var_LLR)
     return Brownian(a=self.a, b=self.b, mu=mu_LLR,
                     sigma=sigma_LLR).outcome_cdf(T=self.T, y=self.llr)
Exemple #7
0
def get_elo(results):
    """
    "results" is an array of length 2*n+1 with aggregated frequences
    for n games."""
    results = LLRcalc.regularize(results)
    games, mu, var = stats(results)
    stdev = math.sqrt(var)

    # 95% confidence interval for mu
    mu_min = mu + Phi_inv(0.025) * stdev / math.sqrt(games)
    mu_max = mu + Phi_inv(0.975) * stdev / math.sqrt(games)

    el = elo(mu)
    elo95 = (elo(mu_max) - elo(mu_min)) / 2.0
    los = Phi((mu - 0.5) / (stdev / math.sqrt(games)))

    return el, elo95, los
Exemple #8
0
    def set_state(self, results):
        N, self.pdf = LLRcalc.results_to_pdf(results)
        mu_LLR, var_LLR = self.LLR_drift_variance(self.pdf, self.s0, self.s1, None)

        # llr estimate
        self.llr = N * mu_LLR
        self.T = N

        # now normalize llr (if llr is not legal then the implications
        # of this are unclear)
        slope = self.llr / N
        if self.llr > 1.03 * self.b or self.llr < 1.03 * self.a:
            self.clamped = True
        if self.llr < self.a:
            self.T = self.a / slope
            self.llr = self.a
        elif self.llr > self.b:
            self.T = self.b / slope
            self.llr = self.b
Exemple #9
0
def update_SPRT(R, sprt):
    """Sequential Probability Ratio Test

sprt is a dictionary with fixed fields

'elo0', 'alpha', 'elo1', 'beta', 'elo_model', 'lower_bound', 'upper_bound'.

It also has the following fields

'llr', 'state', 'overshoot'

which are updated by this function.

Normally this function should be called after each finished game (trinomial) or
game pair (pentanomial) but it is safe to call it multiple times with the same parameters.
Skipped updates are also handled sensibly.

The meaning of the inputs and the fields is as follows.

H0: elo = elo0
H1: elo = elo1
alpha = max typeI error (reached on elo = elo0)
beta = max typeII error for elo >= elo1 (reached on elo = elo1)
'overshoot' is a dictionary with data for dynamic overshoot
estimation. The theoretical basis for this is: Siegmund - Sequential
Analysis - Corollary 8.33.  The correctness can be verified by
simulation

https://github.com/vdbergh/simul

R['wins'], R['losses'], R['draws'] contains the number of wins, losses and draws
R['pentanomial'] contains the pentanomial frequencies
elo_model can be either 'BayesElo' or 'logistic'
"""

    # the next two lines are superfluous, but necessary for backward compatibility
    sprt['lower_bound'] = math.log(sprt['beta'] / (1 - sprt['alpha']))
    sprt['upper_bound'] = math.log((1 - sprt['beta']) / sprt['alpha'])

    elo_model = sprt.get('elo_model', 'BayesElo')
    assert (elo_model in ['BayesElo', 'logistic'])
    elo0 = sprt['elo0']
    elo1 = sprt['elo1']

    # first deal with the legacy BayesElo/trinomial models
    R3 = LLRcalc.regularize([R['losses'], R['draws'], R['wins']])
    if elo_model == 'BayesElo':
        # estimate drawelo out of sample
        drawelo = draw_elo_calc(R3)
        # conversion of bounds to logistic elo
        lelo0, lelo1 = [bayeselo_to_elo(elo, drawelo) for elo in (elo0, elo1)]
    else:
        lelo0, lelo1 = elo0, elo1

    # Log-Likelihood Ratio
    R_ = R.get('pentanomial', R3)
    sprt['llr'] = LLRcalc.LLR_logistic(lelo0, lelo1, R_)

    # update the overshoot data
    if 'overshoot' in sprt:
        LLR_ = sprt['llr']
        o = sprt['overshoot']
        num_samples = sum(R_)
        if num_samples < o['last_update']:  # purge?
            sprt['lost_samples'] = o['last_update'] - num_samples  # audit
            del sprt['overshoot']  # the contract is violated
        else:
            if num_samples == o['last_update']:  # same data
                pass
            elif num_samples == o['last_update'] + 1:  # the normal case
                if LLR_ < o['ref0']:
                    delta = LLR_ - o['ref0']
                    o['m0'] += delta
                    o['sq0'] += delta**2
                    o['ref0'] = LLR_
                if LLR_ > o['ref1']:
                    delta = LLR_ - o['ref1']
                    o['m1'] += delta
                    o['sq1'] += delta**2
                    o['ref1'] = LLR_
            else:
                # Be robust if some updates are lost: reset data collection.
                # This should not be needed anymore, but just in case...
                o['ref0'] = LLR_
                o['ref1'] = LLR_
                o['skipped_updates'] += (num_samples -
                                         o['last_update']) - 1  # audit
            o['last_update'] = num_samples

    o0 = 0
    o1 = 0
    if 'overshoot' in sprt:
        o = sprt['overshoot']
        o0 = -o['sq0'] / o['m0'] / 2 if o['m0'] != 0 else 0
        o1 = o['sq1'] / o['m1'] / 2 if o['m1'] != 0 else 0

    # now check the stop condition
    sprt['state'] = ''
    if sprt['llr'] < sprt['lower_bound'] + o0:
        sprt['state'] = 'rejected'
    elif sprt['llr'] > sprt['upper_bound'] - o1:
        sprt['state'] = 'accepted'
Exemple #10
0
def update_SPRT(R, sprt):
    """Sequential Probability Ratio Test

    sprt is a dictionary with fixed fields

    'elo0', 'alpha', 'elo1', 'beta', 'elo_model', 'lower_bound', 'upper_bound', 'batch_size'

    It also has the following fields

    'llr', 'state', 'overshoot'

    which are updated by this function.

    Normally this function should be called each time 'batch_size' games (trinomial) or
    game pairs (pentanomial) have been completed but it is safe to call it multiple times
    with the same parameters. The main purpose of this is to be able to recalculate
    the LLR for old tests.

    In the unlikely event of a server crash it is possible that some updates may be missed
    but this situation is also handled sensibly.

    The meaning of the other inputs and the fields is as follows.

    H0: elo = elo0
    H1: elo = elo1
    alpha = max typeI error (reached on elo = elo0)
    beta = max typeII error for elo >= elo1 (reached on elo = elo1)
    'overshoot' is a dictionary with data for dynamic overshoot
    estimation. The theoretical basis for this is: Siegmund - Sequential
    Analysis - Corollary 8.33.  The correctness can be verified by
    simulation

    https://github.com/vdbergh/simul

    R['wins'], R['losses'], R['draws'] contains the number of wins, losses and draws
    R['pentanomial'] contains the pentanomial frequencies
    elo_model can be either 'BayesElo', 'logistic' or 'normalized'"""

    # the next two lines are superfluous, but unfortunately necessary for backward
    # compatibility with old tests
    sprt["lower_bound"] = math.log(sprt["beta"] / (1 - sprt["alpha"]))
    sprt["upper_bound"] = math.log((1 - sprt["beta"]) / sprt["alpha"])

    elo_model = sprt.get("elo_model", "BayesElo")
    assert elo_model in ["BayesElo", "logistic", "normalized"]
    elo0 = sprt["elo0"]
    elo1 = sprt["elo1"]

    # first deal with the legacy BayesElo/trinomial models
    R3 = [R.get("losses", 0), R.get("draws", 0), R.get("wins", 0)]
    if elo_model == "BayesElo":
        # estimate drawelo out of sample
        R3_ = LLRcalc.regularize(R3)
        drawelo = draw_elo_calc(R3_)
        # conversion of bounds to logistic elo
        elo0, elo1 = [bayeselo_to_elo(elo, drawelo) for elo in (elo0, elo1)]
        elo_model = "logistic"

    R_ = R.get("pentanomial", R3)

    batch_size = sprt.get("batch_size", 1)

    # sanity check on batch_size
    if sum(R_) % batch_size != 0:
        sprt["illegal_update"] = sum(R_)  # audit
        if "overshoot" in sprt:
            del sprt["overshoot"]  # the contract is violated

    # Log-Likelihood Ratio
    assert elo_model in ["logistic", "normalized"]
    if elo_model == "logistic":
        sprt["llr"] = LLRcalc.LLR_logistic(elo0, elo1, R_)
    else:
        sprt["llr"] = LLRcalc.LLR_normalized(elo0, elo1, R_)

    # update the overshoot data
    if "overshoot" in sprt:
        LLR_ = sprt["llr"]
        o = sprt["overshoot"]
        num_samples = sum(R_)
        if num_samples < o["last_update"]:  # purge?
            sprt["lost_samples"] = o["last_update"] - num_samples  # audit
            del sprt["overshoot"]  # the contract is violated
        else:
            if num_samples == o["last_update"]:  # same data
                pass
            elif num_samples == o[
                    "last_update"] + batch_size:  # the normal case
                if LLR_ < o["ref0"]:
                    delta = LLR_ - o["ref0"]
                    o["m0"] += delta
                    o["sq0"] += delta**2
                    o["ref0"] = LLR_
                if LLR_ > o["ref1"]:
                    delta = LLR_ - o["ref1"]
                    o["m1"] += delta
                    o["sq1"] += delta**2
                    o["ref1"] = LLR_
            else:
                # Be robust if some updates are lost: reset data collection.
                # This should not be needed anymore, but just in case...
                o["ref0"] = LLR_
                o["ref1"] = LLR_
                o["skipped_updates"] += (num_samples -
                                         o["last_update"]) - 1  # audit
            o["last_update"] = num_samples

    o0 = 0
    o1 = 0
    if "overshoot" in sprt:
        o = sprt["overshoot"]
        o0 = -o["sq0"] / o["m0"] / 2 if o["m0"] != 0 else 0
        o1 = o["sq1"] / o["m1"] / 2 if o["m1"] != 0 else 0

    # now check the stop condition
    sprt["state"] = ""
    if sprt["llr"] < sprt["lower_bound"] + o0:
        sprt["state"] = "rejected"
    elif sprt["llr"] > sprt["upper_bound"] - o1:
        sprt["state"] = "accepted"