Beispiel #1
0
def LastLeadChangeBias(sport):
    if sport == 'NBA':
        scope = 2880
        bins = arcs.NBAbins()
    else:
        scope = 3600
        if sport == 'NHL':
            bins = arcs.NHLbins()
        else:
            bins = arcs.NFLbins()
    data = lt.getData(sport)
    lead = lt.Lead(data, sport)
    top1, top2, maxLead = calcTertiles(lead)
    lead1, lead2, lead3 = getTertiles(lead, top1, top2)
    hcorr1, b1 = getLastLeadFreq(lead1, bins)
    hcorr2, b2 = getLastLeadFreq(lead2, bins)
    hcorr3, b3 = getLastLeadFreq(lead3, bins)
    plt.scatter(b1[:len(b1)-1],hcorr1,facecolors='none', edgecolors='red',marker='s',\
   label=sport+' data, final lead diff: 0-'+str(int(top1)))
    plt.scatter(b2[:len(b2)-1], hcorr2, facecolors='none', edgecolors='blue', marker='o', \
   label=sport+' data, final lead diff: '+str(int(top1))+"-"+str(int(top2)))
    plt.scatter(b3[:len(b3)-1],hcorr3,facecolors='none', edgecolors='green',marker='D',\
   label=sport+' data, final lead diff: '+str(int(top2))+'-'+str(int(maxLead)))
    plt.xlim(xmin=0, xmax=scope)
    plt.ylim(ymin=0, ymax=0.0025)
    plt.xlabel('Clock time, seconds')
    plt.ylabel('Probability of last lead change')
    plt.legend()
    plt.show()
Beispiel #2
0
def NumLeadChanges(sport):
    scope, N = rw.getScope(sport)
    data = lt.getData(sport)
    lead = lt.Lead(data, sport)
    hcorr, b = getNumFreq(lead)
    maxc = b[-1]
    rw_lead = rw.Lead(sport)
    hcorr1, b1 = getNumFreq(rw_lead)
    #(b[:b[-1]]).tofile('../Results/'+sport+'numLeadChangesX.csv', sep=',')
    #hcorr.tofile('../Results/'+sport+'numLeadChangesY.csv', sep=',')

    fontSize = 18
    ax = plt.gca()
    ax.tick_params(labelsize=fontSize)
    plt.plot(b[:b[-1]], hcorr, 'r^-', lw=2, ms=8, label=sport + ' data')
    plt.plot(b1[:b1[-1]], hcorr1, 'bo-', lw=2, ms=8, label='Poisson process')
    #sqrt{2 / ( pi*N) }*exp(-(m^2) / (2N))
    x = np.arange(0, maxc + 0.1, 0.1)
    y = ((2 / (np.pi * N))**0.5) * np.exp(-(x**2) / (2 * N))
    plt.plot(x, y, c='black', linewidth=2, label='Eq.(3)')
    plt.xlim(xmin=0, xmax=maxc)
    plt.ylim(ymin=0, ymax=0.5)
    plt.xlabel('Number of lead changes', fontsize=fontSize)
    plt.ylabel('Relative frequency', fontsize=fontSize)
    plt.legend(prop={'size': fontSize})
    plt.show()
Beispiel #3
0
def plotMaxLeadSmoothed(sport, step):
    data = lt.getData(sport)
    lead = lt.Lead(data, sport)
    scope = len(lead[0])
    m, t = lt.maxLeadTime(lead)
    df = pd.DataFrame(m, index=t)
    gr = df.groupby(df.index)
    avgm = gr.aggregate(np.mean)
    bins = np.arange(min(avgm.index), max(avgm.index) + 1, step)
    groups = np.digitize(avgm.index.values.astype(int), bins)
    grouped = avgm.groupby(groups)
    groupAv = grouped.mean()
    groupAv.dropna()
    x = [bins[i - 1] for i in groupAv.index]
    plt.scatter(x, groupAv)
    plt.xlim(xmin=0, xmax=scope)
    plt.ylim(ymin=0)
    plt.xlabel('Elapsed time, t')
    plt.ylabel('Maximum lead in a game')
    plt.show()
Beispiel #4
0
def NumLeadChangesBias(sport):
    data = lt.getData(sport)
    lead = lt.Lead(data, sport)
    top1, top2, maxLead = calcTertiles(lead)
    lead1, lead2, lead3 = getTertiles(lead, top1, top2)
    hcorr1, b1 = getNumFreq(lead1)
    hcorr2, b2 = getNumFreq(lead2)
    hcorr3, b3 = getNumFreq(lead3)
    maxc = b1[-1]
    plt.scatter(b1[:b1[-1]],hcorr1,facecolors='none', edgecolors='red',marker='s',\
   label=sport+' data, final lead diff: 0-'+str(int(top1)))
    plt.scatter(b2[:b2[-1]], hcorr2, facecolors='none', edgecolors='blue', marker='o', \
   label=sport+' data, final lead diff: '+str(int(top1))+"-"+str(int(top2)))
    plt.scatter(b3[:b3[-1]],hcorr3,facecolors='none', edgecolors='green',marker='D',\
   label=sport+' data, final lead diff: '+str(int(top2))+'-'+str(int(maxLead)))
    plt.xlim(xmin=0, xmax=maxc)
    plt.ylim(ymin=0)
    plt.xlabel('Number of lead changes')
    plt.ylabel('Relative frequency')
    plt.legend()
    plt.show()
Beispiel #5
0
def ProbLeadSafe(sport):
    data = lt.getData(sport)
    lead = lt.Lead(data, sport)
    z, ave_q = getSafeAvg(lead, sport)
    sm_lead = sm.Lead(sport)
    sm_z, sm_q = getSafeAvg(sm_lead, sport)
    #BJsafe=lt.BJ(lead)
    #BJz_tuples=lt.getZ(lead, BJsafe, sport)
    #BJdf=pd.DataFrame(BJz_tuples)
    #BJz,BJq=binAverage(BJdf)

    plt.scatter(z, ave_q, color='red', marker='s', label=sport + ' data')
    plt.plot(sm_z,
             sm_q,
             color='black',
             linewidth=2.5,
             label='theory (unbiased rw)')
    #plt.plot(BJz, BJq, 'b--', linewidth=2.5,label='Bill James\' rule')
    plt.legend()
    plt.xlim(0, 2)
    plt.ylim(0, 1)
    plt.xlabel('Effective lead, z')
    plt.ylabel('Probability that effective lead is safe')
    plt.show()
Beispiel #6
0
def plotWScoringRate(sport):
    if sport == 'NBA':
        scope = 2880
        seasons = '2002-2010'
        bins = arcs.NBAbins()
    else:
        scope = 3600
        seasons = '2000-2009'
        if sport == 'NHL':
            bins = arcs.NHLbins()
        elif sport == 'CFB':
            bins = arcs.NFLbins()
        elif sport == 'NFL':
            bins = arcs.NFLbins()

    binw = arcs.getBinWidth(bins)

    step = 120.0
    data = lt.getData(sport)
    lead = lt.Lead(data, sport)
    inLead = lt.inLead(lead)
    s = lt.lastChange(inLead)
    #m,s=lt.maxLeadTime(lead)
    '''rw_lead=rwk.Lead(sport)
	rw_inLead=rwk.inLead(rw_lead)
	rw=rwk.lastChange(rw_inLead)'''
    sm_lead = sm.Lead(sport)
    sm_inLead = sm.inLead(sm_lead)
    sr = sm.lastChange(sm_inLead)
    #msr,sr=lt.maxLeadTime(sm_lead)

    ev_prob=pd.DataFrame.from_csv('/Users/Ish/Documents/SafeLeads/Results/'+sport+'_res/'+sport+'_eventProb.csv',\
   header=None)

    fontSize = 18
    window = 10
    #f, (ax1, ax2) = plt.subplots(2, sharex=True)
    f = plt.figure()
    gs = gridspec.GridSpec(2, 1, height_ratios=[2, 3])  #1 used to be 3
    ax1 = plt.subplot(gs[0, :])
    ax2 = plt.subplot(gs[1:, :], sharex=ax1)
    smoothEvProb = movingaverage(ev_prob[1], window)
    avg = np.mean(smoothEvProb)
    #y_formatter = matplotlib.ticker.ScalarFormatter(useOffset=-100)
    #ax1.yaxis.set_major_formatter(y_formatter)
    ax1.plot(smoothEvProb, linewidth=1.3)
    ax1.hlines(avg, 0, scope, colors='red', linewidth=2)
    yticks = ax1.yaxis.get_major_ticks()
    yticks[0].label1.set_visible(False)
    ax1.set_ylim(ymin=0, ymax=max(smoothEvProb) + 0.005)
    ax1.set_ylabel('Pr(scoring event)', fontsize=fontSize,
                   labelpad=25)  #25 for NBA
    ax1.tick_params(labelsize=fontSize)

    ax2.tick_params(labelsize=fontSize)

    h, b = np.histogram(s, bins)
    hcorr = h / (binw * len(s))
    ax2.scatter(b[:len(bins) - 1],
                hcorr,
                c='blue',
                marker='o',
                label=sport + ' games')
    hr, br = np.histogram(sr, bins)
    hrcorr = hr / (binw * len(sr))
    ax2.plot(br[:len(bins) - 1],
             hrcorr,
             color='DarkTurquoise',
             linewidth=2,
             label='Inhomogeneous Poisson process')
    #ubiased RW
    '''rwstep=20.0
	rwbins=scope/rwstep
	h,b=np.histogram(rw, rwbins)
	hcorr=h/(rwstep*len(rw))
	plt.plot(b[:rwbins], hcorr, c='purple',linewidth=2.5,label='Homogenous Poisson process') '''
    x = np.array(range(scope))  #arcsine law
    y = 1 / (np.pi * (x * (scope + 1 - x))**(0.5))
    ax2.plot(x, y, color='FireBrick', linewidth=2, label='Arcsine law')
    ax2.set_xlim(xmin=0, xmax=scope)
    ax2.set_ylim(ymin=0, ymax=0.0027)
    ax2.legend(prop={'size': fontSize})
    ax2.set_xlabel('Game clock time, t (seconds)', fontsize=fontSize)
    ax2.set_ylabel('Pr(last lead change)', fontsize=fontSize)
    f.subplots_adjust(left=0.16,
                      right=0.95,
                      top=0.95,
                      bottom=0.11,
                      hspace=0.00001)  #16,13
    # Fine-tune figure; make subplots close to each other and hide x ticks for
    # all but bottom plot.
    plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
    plt.show()
Beispiel #7
0
def PropInLead(sport):
    #portion of time first team to score is in the lead

    data = lt.getData(sport)
    lead = lt.Lead(data, sport)
    inLead = lt.inLead(lead)
    rands = np.random.rand(len(inLead))
    mult = -1 + 2 * (rands < 0.5)
    matr = (np.tile(mult, (len(inLead[0]), 1))).T
    rand_inLead = np.multiply(matr, inLead)

    props = np.sum(rand_inLead < 0, axis=1)

    sm_lead = sm.Lead(sport)
    sm_inLead = sm.inLead(sm_lead)
    rands = np.random.rand(len(sm_inLead))
    mult = -1 + 2 * (rands < 0.5)
    matr = (np.tile(mult, (len(sm_inLead[0]), 1))).T
    sm_randInLead = np.multiply(matr, sm_inLead)
    sm_props = np.sum(sm_randInLead < 0, axis=1)

    rw_lead = rw.Lead(sport)
    rw_inLead = rw.inLead(rw_lead)
    rands = np.random.rand(len(rw_inLead))
    mult = -1 + 2 * (rands < 0.5)
    matr = (np.tile(mult, (len(rw_inLead[0]), 1))).T
    rw_randInLead = np.multiply(matr, rw_inLead)
    rw_props = np.sum(rw_randInLead < 0, axis=1)

    if sport == 'NBA':
        scope = 2880
        bins = arcs.NBAbins()
    else:
        scope = 3600
        if sport == 'NHL':
            bins = arcs.NHLbins()
        else:
            bins = arcs.NFLbins()

    binw = arcs.getBinWidth(bins)
    fontSize = 18

    ax = plt.gca()
    ax.tick_params(labelsize=fontSize)
    h, b = np.histogram(props, bins)
    hcorr = h / (binw * len(props))
    plt.scatter(b[:len(bins) - 1],
                hcorr,
                c='blue',
                marker='o',
                label=sport + ' games')

    hr, br = np.histogram(sm_props, bins)
    hrcorr = hr / (binw * len(sm_props))
    plt.plot(br[:len(bins) - 1],
             hrcorr,
             color='DarkTurquoise',
             linewidth=2,
             label='Inhomogeneous Poisson process')
    #Unbiased rw
    #rwstep=20.0
    #rwbins=scope/rwstep
    #hw,bw=np.histogram(rw_props,rwbins)
    #hwcorr=hw/(rwstep*len(rw_props))
    #plt.plot(bw[:rwbins],hwcorr,color='DarkSalmon', linewidth=2,label='Homogeneous Poisson process')
    #arcsine law
    x = np.array(range(scope))
    y = 1 / (np.pi * (x * (scope + 1 - x))**(0.5))
    plt.plot(x, y, color='FireBrick', linewidth=2, label='Arcsine law')
    plt.xlim(xmin=0, xmax=scope)
    plt.ylim(ymin=0, ymax=0.0027)
    plt.legend(prop={'size': fontSize})
    plt.xlabel('Number of seconds a team is in the lead', fontsize=fontSize)
    plt.ylabel('Relative frequency', fontsize=fontSize)
    plt.subplots_adjust(left=0.16, right=0.95, top=0.95, bottom=0.13)
    #plt.savefig(sport+'varBinnedUpdate.pdf')
    #plt.close()
    plt.show()
def plotBathtub(sport):
    if sport == 'NBA':
        scope = 2880
        seasons = '2002-2010'
        bins = NBAbins()
    else:
        scope = 3600
        seasons = '2000-2009'
        if sport == 'NHL':
            bins = NHLbins()
        elif sport == 'CFB':
            bins = NFLbins()
        elif sport == 'NFL':
            bins = NFLbins()

    binw = getBinWidth(bins)

    step = 120.0
    data = lt.getData(sport)
    lead = lt.Lead(data, sport)
    inLead = lt.inLead(lead)
    s = lt.lastChange(inLead)
    #m,s=lt.maxLeadTime(lead)
    '''rw_lead=rwk.Lead(sport)
	rw_inLead=rwk.inLead(rw_lead)
	rw=rwk.lastChange(rw_inLead)'''
    sm_lead = sm.Lead(sport)
    sm_inLead = sm.inLead(sm_lead)
    sr = sm.lastChange(sm_inLead)
    msr, sr = lt.maxLeadTime(sm_lead)

    fontSize = 18
    ax = plt.gca()
    ax.tick_params(labelsize=fontSize)

    h, b = np.histogram(s, bins)
    hcorr = h / (binw * len(s))
    plt.scatter(b[:len(bins) - 1],
                hcorr,
                c='blue',
                marker='o',
                label=sport + ' games')
    hr, br = np.histogram(sr, bins)
    hrcorr = hr / (binw * len(sr))
    plt.plot(br[:len(bins) - 1],
             hrcorr,
             color='DarkTurquoise',
             linewidth=2,
             label='Inhomogeneous Poisson process')
    #ubiased RW
    '''rwstep=20.0
	rwbins=scope/rwstep
	h,b=np.histogram(rw, rwbins)
	hcorr=h/(rwstep*len(rw))
	plt.plot(b[:rwbins], hcorr, c='purple',linewidth=2.5,label='Homogenous Poisson process') '''
    x = np.array(range(scope))  #arcsine law
    y = 1 / (np.pi * (x * (scope + 1 - x))**(0.5))
    plt.plot(x, y, color='FireBrick', linewidth=2, label='Arcsine law')
    plt.xlim(xmin=0, xmax=scope)
    plt.ylim(ymin=0, ymax=0.0027)
    plt.legend(prop={'size': fontSize})
    plt.xlabel('Game clock time, t (seconds)', fontsize=fontSize)
    plt.ylabel('Probability of maximum lead change', fontsize=fontSize)
    plt.subplots_adjust(left=0.16, right=0.95, top=0.95, bottom=0.13)
    #plt.savefig(sport+'varBinnedUpdate.pdf')
    #plt.close()
    plt.show()