Beispiel #1
0
 def ppf(self, att, uni):
     if att == 0: return 0
     succ = 0
     cdf = dist_fit.beta_binomial(0, att, *self.ab)
     while cdf < uni:
         succ += 1
         cdf += dist_fit.beta_binomial(succ, att, *self.ab)
     return succ
Beispiel #2
0
 def _mae(self, data, weights=None):
     assert((data >= 0).all() and (data <= self.n).all())
     maes = []
     alphas,betas = self._get_abs(data, weights)
     support = np.arange(0,self.n+1)
     for d,a,b in zip(data,alphas,betas):
         probs = dist_fit.beta_binomial( support, self.n, a, b )
         maes.append( sum(probs*np.abs(support-d)) )
     return np.array(maes)
Beispiel #3
0
 def cdf(self, succ, att):
     # CDF is the % of the mass that is *at or equal to* the value.
     # these CDFs will not be flat, since most results are 0 and that's most of the way up the CDF already
     # this is the definition we want, however, for analyzing the correlations
     cdf = 0.
     check = 0
     while check <= succ:
         cdf += dist_fit.beta_binomial(check, att, *self.ab)
         check += 1
     return cdf
Beispiel #4
0
 def _mse(self, data, weights=None):
     # lr can be used to suppress learning
     # we could also apply multiplicative decay after adding, which will result in variance decay even in long careers
     assert((data >= 0).all() and (data <= self.n).all())
     mses = []
     alphas,betas = self._get_abs(data, weights)
     # domain of summation for EV computation:
     support = np.arange(0,self.n+1)
     for d,a,b in zip(data,alphas,betas):
         # beta-binomial isn't in scipy - there is an open ticket; the CDF is difficult to implement
         # it's not so bad to compute manually since the domain is finite
         probs = dist_fit.beta_binomial( support, self.n, a, b )
         mses.append( sum(probs*(support-d)**2) )
     # log.debug('alpha, beta = {},{}'.format(alpha,beta))
     return np.array(mses)
Beispiel #5
0
    log.info(f'all: alpha = {ainc}, beta = {binc}, LL per dof = {llpdf}')
    log.info(f'mean: {ainc/(ainc+binc)}')
    log.info('covariance:\n%s', cov)

    sns.set()
    xfvals = np.linspace(-0.5, maxgames + 0.5, 128)
    plt_gp = sns.distplot(data_gp_rook,
                          bins=range(0, maxgames + 2),
                          kde=False,
                          norm_hist=True,
                          hist_kws={
                              'log': False,
                              'align': 'left'
                          })
    plt.plot(xfvals,
             dist_fit.beta_binomial(xfvals, maxgames, ark, brk),
             '--',
             lw=2,
             color='blue')
    # plt.title('rookies')
    plt_gp.figure.savefig('{}_{}_rookie.png'.format(gp_stat, pos))
    plt_gp.figure.show()

    plt_gp = sns.distplot(data_gp,
                          bins=range(0, maxgames + 2),
                          kde=False,
                          norm_hist=True,
                          hist_kws={
                              'log': False,
                              'align': 'left'
                          })
Beispiel #6
0
    data_gp = posdf[gp_stat]
    
    
    _,(ark,brk),cov,llpdf = dist_fit.to_beta_binomial( maxgames, data_gp_rook )
    log.info('rookie: alpha = {}, beta = {}, LL per dof = {}'.format(ark, brk, llpdf))
    log.info('covariance:\n' + str(cov))
    _,(ainc,binc),cov,llpdf = dist_fit.to_beta_binomial( maxgames, data_gp )
    log.info('all: alpha = {}, beta = {}, LL per dof = {}'.format(ainc, binc, llpdf))
    log.info('covariance:\n' + str(cov))
    
    sns.set()
    xfvals = np.linspace(-0.5, maxgames+0.5, 128)
    plt_gp = sns.distplot(data_gp, bins=range(0,maxgames+2),
                          kde=False, norm_hist=True,
                          hist_kws={'log':False, 'align':'left'})
    plt.plot(xfvals, dist_fit.beta_binomial(xfvals, maxgames, ark, brk), '--', lw=2, color='violet')
    plt.title('rookies')
    plt_gp.figure.savefig('{}_{}.png'.format(gp_stat, pos))
    plt_gp.figure.show()
    # The bayesian update rule is:
    # alpha -> alpha + (games_played)
    # beta -> beta + (n - games_played)
    # we can just start on the default rookie values

    # for QBs we might want to adjust for year-in-league, or just filter on those which started many games

    # log.info('using rookie a,b = {},{}'.format(ark,brk))
    # alpha0,beta0 = ark,brk
    # log.info('using inclusive a,b = {}'.format(ainc, binc)) # does a bit worse
    # alpha0,beta0 = ainc,binc
    # m1 = data_gp_rook.mean()