def predict_proba(self, X_home, X_away, n_max=20):
        """
        Predict match outcome probabilities.

        Parameters
        ----------
        X_home:
            2d array-like, shape (n_samples, n_features). Input information to predict estimated goals for home team.
        X_away:
            2d array-like, shape (n_samples, n_features). Input information to predict estimated goals for away team.
        n_max: 
            int, no less than 0. Maxmium goals for a team per match.
            
        Returns
        -------
        p_matrix:
            2d array-like, shape (n_samples, 3). Matrix of estimated probabilities. Each row is the probabilities for 3 possibile outcomes of each match.
        """
        
        hgoal_lambda, agoal_lambda = self._lambda(X_home, X_away)

        p_win = np.sum(skellam.pmf(np.arange(n_max)+1, hgoal_lambda.reshape(-1,1), agoal_lambda.reshape(-1,1)), axis=1)
        p_draw = np.sum(skellam.pmf(0, hgoal_lambda.reshape(-1,1), agoal_lambda.reshape(-1,1)), axis=1)
        p_lose = np.sum(skellam.pmf(np.arange(n_max)-n_max, hgoal_lambda.reshape(-1,1), agoal_lambda.reshape(-1,1)), axis=1)
    
        p_matrix = np.array([p_win,p_draw, p_lose]).transpose()
        
        return p_matrix
Ejemplo n.º 2
0
 def calculateStrategicUtilities(self, passedCandidates, passedElectors, MIN_UTIL, iteration):
     electorID = self.ID
     nCandidates = len(passedCandidates)
     self.allVotes = GlobalFuncs.countVoteIntentions(passedElectors,        \
                                                 passedCandidates,iteration)
     self.chosenCandidate = self.chooseCandidate(passedCandidates, iteration)
     self.othersVotes = self.allVotes
     self.othersVotes[self.chosenCandidate.ID] =                            \
                                self.othersVotes[self.chosenCandidate.ID] - 1
     for rowIndex in range(0,nCandidates):
         for colIndex in range(0,nCandidates):
             if rowIndex == colIndex:
                 self.tieProbs[rowIndex,colIndex] = 1
                 self.pivotalityProbs[rowIndex,colIndex] = 1
                 self.winnerProbs[rowIndex,colIndex] = 1
             else:
                 skellamA = self.othersVotes[rowIndex]
                 skellamB = self.othersVotes[colIndex]
                 if skellamA == 0:
                     skellamA = 10**-100
                 if skellamB == 0:
                     skellamB = 10**-100
                 self.tieProbs[rowIndex,colIndex] = skellam.pmf(0,skellamA,skellamB)
                 self.pivotalityProbs[rowIndex,colIndex] = skellam.pmf(-1,skellamA,skellamB)
                 self.winnerProbs[rowIndex,colIndex] = 1 - skellam.cdf(-1,skellamA,skellamB)
     #UNCOMMENT ONLY IN CASE OF PROBLEMS WITH 0 ENTRIES###############
     #for rowIndex in range(0,nCandidates):
     #    for colIndex in range(0,nCandidates):
     #        if math.isnan(self.tieProbs[rowIndex,colIndex]):
     #            self.tieProbs[rowIndex,colIndex] = 0
     #        if math.isnan(self.pivotalityProbs[rowIndex,colIndex]):
     #            self.pivotalityProbs[rowIndex,colIndex] = 0
     #        if math.isnan(self.winnerProbs[rowIndex,colIndex]):
     #            self.winnerProbs[rowIndex,colIndex] = 0
     #################################################################
     for rowIndex in range(0,nCandidates):
         for colIndex in range(0,nCandidates):
             if rowIndex != colIndex:
                 probsWoutPair = np.delete(self.winnerProbs,rowIndex,0)
                 probsWOutPair = np.delete(probsWoutPair,colIndex,1)
                 probsProd = np.prod(probsWOutPair)
                 otherPivsSum = self.pivotalityProbs[rowIndex,colIndex] + self.winnerProbs[rowIndex,colIndex]
                 self.pivotalities[rowIndex,colIndex] = probsProd * otherPivsSum
     if iteration == 0:
         self.previousUtilities = self.sincereUtilities
     else:
         self.previousUtilities = self.strategicUtilities
     for cand in range(0,nCandidates):
         for otherCand in range(0,nCandidates):
             utilityDiff = self.previousUtilities[cand] - self.sincereUtilities[otherCand]
             self.newUtilDiff[otherCand] = utilityDiff * self.pivotalities[cand,otherCand]
         self.newUtilitySum[cand] = np.sum(self.newUtilDiff)
     self.newUtilitySum[np.argmin(self.sincereUtilities)] = MIN_UTIL
     self.strategicUtilities = self.newUtilitySum
     return self.strategicUtilities
    def checkDiffInGoals(self, data=None):
        data = self._genData(data)

        skellam_pred = [
            skellam.pmf(i,
                        data.mean()[0],
                        data.mean()[1]) for i in range(-6, 8)
        ]

        plt.hist(data[['HomeGoals']].values - data[['AwayGoals']].values,
                 range(-6, 8),
                 alpha=0.7,
                 label='Actual',
                 normed=True)
        plt.plot([i + 0.5 for i in range(-6, 8)],
                 skellam_pred,
                 linestyle='-',
                 marker='o',
                 label="Skellam",
                 color='#CD5C5C')
        plt.legend(loc='upper right', fontsize=13)
        plt.xticks([i + 0.5 for i in range(-6, 8)], [i for i in range(-6, 8)])
        plt.xlabel("Home Goals - Away Goals", size=13)
        plt.ylabel("Proportion of Matches", size=13)
        plt.title("Difference in Goals Scored (Home Team vs Away Team)",
                  size=14,
                  fontweight='bold')
        plt.ylim([-0.004, 0.26])
        plt.tight_layout()
        plt.show()
Ejemplo n.º 4
0
def overtime(mu1, mu2, min, up, outcome):

# Sloppy

    if (min < 90):
        aup = 0
        amin = 90
    else:
        aup = up
        amin = min
    
    if (amin <= 105): # 1st extra time
        time_r = (120.0-amin)+stoppage_1ot+stoppage_2ot
    elif (amin <= 120): # 2nd extra time
        time_r = (120.0-amin)+stoppage_2ot

    ft = time_r/(30.0+stoppage_1ot+stoppage_2ot)

    if (outcome=="draw"):
        p = skellam.pmf(-aup, ft*mu1*ot_ft, ft*mu2*ot_ft)
    elif (outcome == "lose"):
        p = skellam.cdf(-1-aup, ft*mu1*ot_ft, ft*mu2*ot_ft)
    else:
        p = skellam.cdf(-1+aup, ft*mu2*ot_ft, ft*mu1*ot_ft)

    return(p)
Ejemplo n.º 5
0
def outcome(mu1, mu2, min, up, outcome):

# Sloppy

    if (min > 90):
        if (outcome=="draw"):
            p = 1.0
        else:
            p = 0.0
        return(p)

    if (min <= 45): # 1st half
        time_r = (90.0-min)+stoppage_1reg+stoppage_2reg
    elif (min <= 90): # 2nd half
        time_r = (90.0-min)+stoppage_2reg
        
    ft = time_r/(90.0+stoppage_1reg+stoppage_2reg)

    if (outcome=="draw"):
        p = skellam.pmf(-up, mu1*ft, mu2*ft)
    elif (outcome == "lose"):
        p = skellam.cdf(-1-up, mu1*ft, mu2*ft)
    else:
        p = skellam.cdf(-1+up, mu2*ft, mu1*ft)

    return(p)
Ejemplo n.º 6
0
def expected_result(elo_a, elo_b, winning_margin):
    """
    https://en.wikipedia.org/wiki/Elo_rating_system#Mathematical_details
    """
    px = skellam.cdf(winning_margin, elo_a, elo_b)
    pwm = skellam.pmf(winning_margin, elo_a, elo_b)
    expect_a = (px + pwm * 0.5) - 0.3
    return expect_a
def BuildPoissonModels(hist_data, feature_list, comp_data=None):
    ''' Build score predictions via (linear) poisson regression. '''
    hist_data_1 = hist_data[["team_1_score"] + feature_list]
    hist_data_2 = hist_data[["team_2_score"] + feature_list]

    formula_1 = "team_1_score ~ " + " + ".join(feature_list)
    formula_2 = "team_2_score ~ " + " + ".join(feature_list)

    # using the GEE package along with independance assumptions to fit poisson model.
    # Am assuming this is using a maximum likleyhood approach?
    fam = Poisson()
    ind = Independence()

    model_1 = GEE.from_formula(formula_1,
                               "team_1_score",
                               hist_data,
                               cov_struct=ind,
                               family=fam)
    model_2 = GEE.from_formula(formula_2,
                               "team_2_score",
                               hist_data,
                               cov_struct=ind,
                               family=fam)

    model_1_fit = model_1.fit()
    model_2_fit = model_2.fit()
    print(model_1_fit.summary())

    hist_data['team_1_score_pred'] = model_1_fit.predict(hist_data)
    hist_data['team_2_score_pred'] = model_2_fit.predict(hist_data)

    # return historical data if comp_data wasn't passed.
    if comp_data is None:
        return hist_data

    # prepare comp data
    comp_data['team_1_score_pred'] = model_1_fit.predict(
        comp_data[feature_list])
    comp_data['team_2_score_pred'] = model_2_fit.predict(
        comp_data[feature_list])

    comp_data['team_1_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: 1 - skellam.cdf(0, x['team_1_score_pred'], x[
            'team_2_score_pred']), 1)
    comp_data['team_tie_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.pmf(0, x['team_1_score_pred'], x['team_2_score_pred']
                              ), 1)
    comp_data['team_2_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.cdf(-1, x['team_1_score_pred'], x['team_2_score_pred'
                                                            ]), 1)

    return hist_data, comp_data
Ejemplo n.º 8
0
def oddspredict2(fixtures, att_params, def_params, hmean, amean):
	resultodds = []
	neutralscore = (hmean+amean)/2
	for j in range(len(fixtures)):
		lamda = neutralscore * att_params[fixtures[j,0]] * def_params[fixtures[j,1]]
		mu = neutralscore * att_params[fixtures[j,1]] * def_params[fixtures[j,0]]
		px = skellam.cdf(-1, lamda, mu)
		p0 = skellam.pmf(0, lamda, mu)
		resultodds.append(px+p0*0.5)
	return resultodds
Ejemplo n.º 9
0
def calculateProb(home_id, away_id, params, num_teams):
    ''' Function to calculate the outcome probabilities between two teams
	@param z: Goal difference
	@param home_id: Home team id
	@param away_id: Away team id
	@param params: Array of parameters, result of the minimization problem
	'''
    mu = params[0]
    h = params[1]
    lambda_one = np.exp(mu + h + params[1 + home_id] +
                        params[num_teams + 1 + away_id])
    lambda_two = np.exp(mu + params[1 + away_id] +
                        params[num_teams + 1 + home_id])
    home_loss = 0
    draw = 0
    home_win = 0
    for z in range(1, 20):
        home_loss += skellam.pmf(-1 * z, lambda_one, lambda_two)
        home_win += skellam.pmf(z, lambda_one, lambda_two)
    draw = skellam.pmf(0, lambda_one, lambda_two)
    return np.array((home_win, draw, home_loss))
Ejemplo n.º 10
0
def sim_season(dataframe, iterations):
    # Team names as keys and points (initially 0) as values
    d = dict.fromkeys(dataframe['Home'].unique().tolist(), 0)

    for _ in range(iterations):
        for _, row in dataframe.iterrows():
            h_xg = row['Home xG']
            a_xg = row['Away xG']

            # Calculate prob of home winning by 1-10 goals
            h_win = sum([skellam.pmf(sup, h_xg, a_xg) for sup in range(1, 10)])

            # Calculate prob of away winning by 1-10 goals
            a_win = sum(
                [skellam.pmf(-sup, h_xg, a_xg) for sup in range(1, 10)])

            # Supremacy of 0 is a draw
            draw = skellam.pmf(0, h_xg, a_xg)

            # Calculate match outcome
            result = random.choices(('Home', 'Draw', 'Away'),
                                    weights=[h_win, draw, a_win])[0]

            # Add 3 points for win, 1 for draw
            if result == 'Home':
                d[row.Home] += 3
            elif result == 'Away':
                d[row.Away] += 3
            else:
                d[row.Home] += 1
                d[row.Away] += 1

    # Update dict with average points rather than total
    d.update((team, pts / iterations) for team, pts in d.items())

    # Create a list containing (team, points) tuples sorted by points
    points_sorted = sorted(d.items(), key=lambda x: x[1], reverse=True)

    return points_sorted
Ejemplo n.º 11
0
def calculateProb(z, home_team, away_team, team_dict, params):
    ''' Function to calculate the outcome probabilities between two teams
	@param z: Goal difference
	@param home_team: Home team string
	@param away_team: Away team string
	@param team_dict: Dictionary mapping team names to integers
	@param params: Array of parameters, result of the minimization problem
	'''
    home_id = team_dict[home_team]
    away_id = team_dict[away_team]
    mu = params[0]
    h = params[1]
    lambda_one = np.exp(mu + h + params[1 + home_id] + params[21 + away_id])
    lambda_two = np.exp(mu + params[1 + away_id] + params[21 + home_id])
    return skellam.pmf(z, lambda_one, lambda_two)
Ejemplo n.º 12
0
 def logp(self, dist):
     dist = int(dist)
     max_dist = 100
     min_dist = -100
     if dist > max_dist: dist = max_dist
     if dist < min_dist: dist = min_dist
     if dist in self._distribution_memo:
         logp = self._distribution_memo[dist]
     else:
         p = skellam.pmf(dist, mu1=self.mu1, mu2=self.mu2)
         if p <= 1e-6:
             p = 1e-6
         logp = math.log(p)
         self._distribution_memo[dist] = logp
     return logp
Ejemplo n.º 13
0
def oddspredict(fixtures, att_params, def_params, hmean, amean):
	resultodds = []
	neutralscore = (hmean+amean)/2
	for j in range(len(fixtures)):
		lamda = neutralscore * att_params[fixtures[j,0]] * def_params[fixtures[j,1]]
		mu = neutralscore * att_params[fixtures[j,1]] * def_params[fixtures[j,0]]
		p_hw, p_drw, p_aw = 0, 0, 0
		# calculate probability matrix
		for x in range(-75, 1):
			px = skellam.pmf(x, lamda, mu)
			if(x<0):
				p_aw = p_aw + px
			else:
				p_aw = p_aw + (px*0.5)
		resultodds.append(1-p_aw)
	return resultodds
Ejemplo n.º 14
0
def likelihoodFn(params, data):
    ''' Function to specify the likelihood given a set of parameters and data
	@param params: Array of parameters to use
	@param data: Array of data to use
	'''
    mu = params[0]
    h = params[1]
    sum_lik = 0
    for r in range(0, data.shape[0]):
        row = data[r, ]
        home_id = row[0]
        away_id = row[1]
        z = row[2]
        lambda_one = np.exp(mu + h + params[1 + home_id] +
                            params[21 + away_id])
        lambda_two = np.exp(mu + params[1 + away_id] + params[21 + home_id])
        sum_lik -= np.log(skellam.pmf(z, lambda_one, lambda_two))
    return sum_lik
Ejemplo n.º 15
0
df_future = df.loc[df['homeGoals'].isnull()]

df_past['matchDate'] = pd.to_datetime(df_past['matchDate'])
df_past['time_diff'] = (max(df_past['matchDate']) -
                        df_past['matchDate']).dt.days
df_past = df_past[[
    'homeTeam', 'homeGoals', 'awayTeam', 'awayGoals', 'time_diff'
]]
df_past.head()
# =============================================================================
# poisson regression model
# =============================================================================

# work out poisson probabilities of goal differences between home and away team of - 8 to plus 8
skellam_pred = [
    skellam.pmf(i, df_past['homeGoals'].mean(), df_past['awayGoals'].mean())
    for i in range(-8, 8)
]

# restructure dataframe by splitting home and away fixtures
goal_model_data = pd.concat([
    df_past[['homeTeam', 'awayTeam',
             'homeGoals']].assign(home=1).rename(columns={
                 'homeTeam': 'team',
                 'awayTeam': 'opponent',
                 'homeGoals': 'goals'
             }),
    df_past[['awayTeam', 'homeTeam',
             'awayGoals']].assign(home=0).rename(columns={
                 'awayTeam': 'team',
                 'homeTeam': 'opponent',
 def _probGoalsDiff(self, diff, data):
     goals_diff = diff
     return skellam.pmf(goals_diff, data.mean()[0], data.mean()[1])
Ejemplo n.º 17
0
from scipy.stats import skellam
import numpy as np
import matplotlib.pyplot as plt

fig, ax = plt.subplots()

mu1, mu2 = 0.03, 0.02
mean, var, skew, kurt = skellam.stats(mu1, mu2, moments='mvsk')

print(mean, var, skew, kurt)

x = np.arange(skellam.ppf(0.01, mu1, mu2),
              skellam.ppf(0.99, mu1, mu2))

ax.plot(x, skellam.pmf(x, mu1, mu2), 'bo', ms=8, label = 'skellam pmf')
ax.vlines(x, 0, skellam.pmf(x, mu1, mu2), colors='b', lw = 5, alpha=0.5)
plt.show()
from scipy.stats import skellam
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

mu1, mu2 = 15, 8
mean, var, skew, kurt = skellam.stats(mu1, mu2, moments='mvsk')

# Display the probability mass function (``pmf``):

x = np.arange(skellam.ppf(0.01, mu1, mu2), skellam.ppf(0.99, mu1, mu2))
ax.plot(x, skellam.pmf(x, mu1, mu2), 'bo', ms=8, label='skellam pmf')
ax.vlines(x, 0, skellam.pmf(x, mu1, mu2), colors='b', lw=5, alpha=0.5)

# Alternatively, the distribution object can be called (as a function)
# to fix the shape and location. This returns a "frozen" RV object holding
# the given parameters fixed.

# Freeze the distribution and display the frozen ``pmf``:

rv = skellam(mu1, mu2)
ax.vlines(x,
          0,
          rv.pmf(x),
          colors='k',
          linestyles='-',
          lw=1,
          label='frozen pmf')
ax.legend(loc='best', frameon=False)
plt.show()
                  'weight': 'bold'
              })

plt.xticks([i - 0.5 for i in range(1, 9)], [i for i in range(9)])
plt.xlabel("Goals per Match", size=13)
plt.ylabel("Proportion of Matches", size=13)
plt.title("Number of Goals per Match (EPL Season 02/03 - 13/14 )",
          size=14,
          fontweight='bold')
plt.ylim([-0.004, 0.4])
plt.tight_layout()
plt.show()

skellam_pred = [
    skellam.pmf(i,
                Training_Set.mean()[0],
                Training_Set.mean()[1]) for i in range(-6, 9)
]
plt.hist(Training_Set[['FTHG']].values - Training_Set[['FTAG']].values,
         range(-6, 9),
         alpha=0.7,
         label='Actual',
         normed=True)
plt.plot([i + 0.5 for i in range(-6, 9)],
         skellam_pred,
         linestyle='-',
         marker='o',
         label="Skellam",
         color='#CD5C5C')
plt.legend(loc='upper right', fontsize=13)
plt.xticks([i + 0.5 for i in range(-6, 9)], [i for i in range(-6, 9)])
Ejemplo n.º 20
0
# | | | car rental 1
# |
# | car rental 2
rent_1 = 3
return_1 = 3
rent_2 = 4
returns_2 = 2
move_cost = 20
car_reward = 100
gamma = 0.9
M = 20
threshold = 12

# skellam distribution
rentals = [[skellam.pmf(x, 3, 3) for x in range(-20, 21, 1)], [skellam.pmf(x, 2, 4) for x in range(-20, 21, 1)]]

# poisson distribution
rents = [[poisson.pmf(x, mu=3) for x in range(21)], [poisson.pmf(x, mu=4) for x in range(21)]]
returns = [[poisson.pmf(x, mu=3) for x in range(21)], [poisson.pmf(x, mu=2) for x in range(21)]]

rents_cdf = [[poisson.cdf(x, mu=3) for x in range(21)], [poisson.cdf(x, mu=4) for x in range(21)]]
returns_cdf = [[poisson.cdf(x, mu=3) for x in range(21)], [poisson.cdf(x, mu=2) for x in range(21)]]


def get_particular_prob(i, i_poss, rent_num):
    if i_poss == M:
        partial_sum = 1.0
        for x in range(0, i + 1):
            partial_sum -= rents[rent_num][x] * (returns_cdf[rent_num][x + i_poss - i] - returns[rent_num][x + i_poss - i])
        partial_sum -= (1 - rents_cdf[rent_num][i]) * (returns_cdf[rent_num][i_poss])
Ejemplo n.º 21
0
plt.ylabel("Proportion of Matches",size=13)
plt.title("Number of Goals per Match (Tippeligaen 2012 Season)",size=14,fontweight='bold')
plt.ylim([-0.004, 0.4])
plt.tight_layout()
plt.show()






# Andel Hjemme maal vs Borte maal (Note that we consider the number of goals scored by each team to be 
# independent events (i.e. P(A n B) = P(A) P(B)). The difference of two Poisson distribution is actually 
# called a Skellam distribution.)

skellam_pred = [skellam.pmf(i,  norske_resultater.mean()[0],  norske_resultater.mean()[1]) for i in range(-6,8)]
plt.hist(norske_resultater[['HomeGoals']].values - norske_resultater[['AwayGoals']].values, range(-6,8),
         alpha=0.7, label='Actual',density=True)
plt.plot([i+0.5 for i in range(-6,8)], skellam_pred,
                  linestyle='-', marker='o',label="Skellam", color = '#CD5C5C')
plt.legend(loc='upper right', fontsize=13)
plt.xticks([i+0.5 for i in range(-6,8)],[i for i in range(-6,8)])
plt.xlabel("Home Goals - Away Goals",size=13)
plt.ylabel("Proportion of Matches",size=13)
plt.title("Difference in Goals Scored (Home Team vs Away Team)",size=14,fontweight='bold')
plt.ylim([-0.004, 0.26])
plt.tight_layout()
plt.show()


Ejemplo n.º 22
0
################################################################################




Debug.Print("rescale factor is: " + str(G_RESCALE_FACTOR))

# Initialize mu_A and mu_B randomly 
mu_A = N_VOTERS_PREF_A = int(np.random.uniform(0, N_VOTERS))
mu_B = N_VOTERS_PREF_B = N_VOTERS - mu_A

Debug.Print("mu_A is: " + str(mu_A))
Debug.Print("mu_B is: " + str(mu_B))

#First two alphas are drawn from a skellam distribution with some upward rescale
alpha1 = skellam.pmf(0, mu_A, mu_B) #prob n1 == n2
alpha2 = skellam.pmf(-1, mu_A, mu_B)  #prob n1 == n2 - 1
alpha1 *= G_RESCALE_FACTOR
alpha2 *= G_RESCALE_FACTOR

Debug.Print("alpha1 is: " + str(alpha1))
Debug.Print("alpha2 is: " + str(alpha2) + "\n")

# Init empty lists of length N_VOTERS_PREF A and B
electorsA = [None] * N_VOTERS_PREF_A
electorsB = [None] * N_VOTERS_PREF_B



n_A = 0
for i in range(0, N_VOTERS_PREF_A):
Ejemplo n.º 23
0
                  'size': '14',
                  'weight': 'bold'
              })

plt.xticks([i - 0.5 for i in range(1, 9)], [i for i in range(9)])
plt.xlabel("Goals per Match", size=13)
plt.ylabel("Proportion of Matches", size=13)
plt.title("Number of Goals per Match (EPL 2016/17 Season)",
          size=14,
          fontweight='bold')
plt.ylim([-0.004, 0.4])
plt.tight_layout()
plt.show()

# probability of draw between home and away team
skellam.pmf(0.0, epl_1617.mean()[0], epl_1617.mean()[1])

# probability of home team winning by one goal
skellam.pmf(1, epl_1617.mean()[0], epl_1617.mean()[1])

skellam_pred = [
    skellam.pmf(i,
                epl_1617.mean()[0],
                epl_1617.mean()[1]) for i in range(-6, 8)
]

plt.hist(epl_1617[['HomeGoals']].values - epl_1617[['AwayGoals']].values,
         range(-6, 8),
         alpha=0.7,
         label='Actual',
         normed=True)
Ejemplo n.º 24
0
import numpy as np
import seaborn
from scipy.stats import poisson, skellam

## Dataset 
ice = pd.read_csv("C:/data/hockey.csv")
display(ice)
ice.columns
ice = ice[['Home','Visitor','G.1','G']]
ice = ice.rename(columns={'G.1': 'Home Goals', 'G': 'Away Goals'})

ice.mean()

## using Skellam statistics 
### probability of draw between home and away team
skellam.pmf(0, ice.mean()[0], ice.mean()[1])
### probability of home team winning by one goal
skellam.pmf(1, ice.mean()[0], ice.mean()[1])
### probability of home team winning by two goals
skellam.pmf(2, ice.mean()[0], ice.mean()[1])
### probability of home team losing by one goal
skellam.pmf(-1, ice.mean()[0], ice.mean()[1])

## importing the tools required for the Poisson regression model
import statsmodels.api as sm
import statsmodels.formula.api as smf

ice.head()
ice_h = ice[['Home','Visitor','Home Goals']]
ice_h.columns = ['team','opponent','goals']
ice_h['field'] = 'home'
Ejemplo n.º 25
0
 def _skellam_pmf(x, mu0, mu1):
     """
     This is the probability mass function of the skellam distribution taken directly from the scipy stats package.
     """
     px = skellam.pmf(x, mu1=mu0, mu2=mu1, loc=0)
     return px
Ejemplo n.º 26
0
def prob_win_change(n):
    z=np.zeros((n+1,4801))
    for i in np.arange(n+1):
        z[i,:]=skellam.pmf(i,new_mean,new_mean)*0.5
    return z
def BuildPoissonXGBTree(hist_data, feature_list, comp_data=None):
    ''' Build score predictions via (tree based) poisson regression. '''

    dtrain_1 = xgb.DMatrix(data=np.matrix(hist_data[feature_list]),
                           label=np.array(hist_data["team_1_score"]),
                           feature_names=feature_list)

    dtrain_2 = xgb.DMatrix(data=np.matrix(hist_data[feature_list]),
                           label=np.array(hist_data["team_2_score"]),
                           feature_names=feature_list)

    param_1 = {
        'max_depth': 2,
        'eta': 0.1,
        'silent': 1,
        'objective': 'count:poisson'
    }
    param_1['nthread'] = 8
    param_1['eval_metric'] = 'poisson-nloglik'

    param_2 = {
        'max_depth': 2,
        'eta': 0.1,
        'silent': 1,
        'objective': 'count:poisson'
    }
    param_2['nthread'] = 8
    param_2['eval_metric'] = 'poisson-nloglik'

    #evallist_1 = [(dtrain, 'train'),(dtest, 'test')]
    evallist_1 = [(dtrain_1, 'train')]

    #evallist_2 = [(dtrain, 'train'),(dtest, 'test')]
    evallist_2 = [(dtrain_2, 'train')]

    num_round = 100
    bst_1 = xgb.train(param_1, dtrain_1, num_round, evallist_1)
    bst_2 = xgb.train(param_2, dtrain_2, num_round, evallist_2)

    ypred_1 = bst_1.predict(dtrain_1)
    ypred_2 = bst_2.predict(dtrain_2)

    hist_data["team_1_score_pred"] = ypred_1
    hist_data["team_2_score_pred"] = ypred_2

    #hist_data[['team_1_score','team_1_score_pred','team_2_score','team_2_score_pred']]
    if comp_data is None:
        return hist_data

    dcomp = xgb.DMatrix(data=np.matrix(comp_data[feature_list]),
                        feature_names=feature_list)

    # prepare comp data
    comp_data['team_1_score_pred'] = bst_1.predict(dcomp)
    comp_data['team_2_score_pred'] = bst_2.predict(dcomp)

    comp_data['team_1_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: 1 - skellam.cdf(0, x['team_1_score_pred'], x[
            'team_2_score_pred']), 1)
    comp_data['team_tie_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.pmf(0, x['team_1_score_pred'], x['team_2_score_pred']
                              ), 1)
    comp_data['team_2_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.cdf(-1, x['team_1_score_pred'], x['team_2_score_pred'
                                                            ]), 1)

    return hist_data, comp_data
import numpy as np
import seaborn
from scipy.stats import poisson,skellam

# Gather and manipulate the data
epl_1718 = pd.read_csv('EPL DATA 2017-2018.csv')
epl_1718 = epl_1718[['HomeTeam','AwayTeam','FTHG','FTAG']]
epl_1718 = epl_1718.rename(columns={'FTHG': 'HomeGoals', 'FTAG':'AwayGoals'})
epl_1718.head()

# Since we're predicting the last round of matches, we need to remove the last 10 rows
epl_1718 = epl_1718[:-10]
epl_1718.mean()

# Probability of a draw between home and away team
skellam.pmf(0.0, epl_1718.mean()[0], epl_1718.mean()[1])

# Probability of Home team winning by one goal
skellam.pmf(1, epl_1718.mean()[0], epl_1718.mean()[1])

# Import some more tools for Poisson Regression
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Making the model
goal_model_data = pd.concat([epl_1718[['HomeTeam', 'AwayTeam', 'HomeGoals']].assign(home=1).rename(
        columns = {'HomeTeam':'team', 'AwayTeam':'opponent', 'HomeGoals':'goals'}),
    epl_1718[['AwayTeam', 'HomeTeam', 'AwayGoals']].assign(home=0).rename(
            columns = {'AwayTeam':'team', 'HomeTeam':'opponent', 'AwayGoals':'goals'})])

poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data,
Ejemplo n.º 29
0
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson, skellam
import statsmodels.api as sm
import statsmodels.formula.api as smf
epl_1617 = pd.read_csv("http://www.football-data.co.uk/mmz4281/1617/E0.csv")
epl_1617 = epl_1617[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]
epl_1617 = epl_1617.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
epl_1617.head()
epl_1617 = epl_1617[:-10]
epl_1617.mean()
skellam.pmf(0.0, epl_1617.mean()[0], epl_1617.mean()[1])
skellam.pmf(1, epl_1617.mean()[0], epl_1617.mean()[1])
goal_model_data = pd.concat([
    epl_1617[['HomeTeam', 'AwayTeam',
              'HomeGoals']].assign(home=1).rename(columns={
                  'HomeTeam': 'team',
                  'AwayTeam': 'opponent',
                  'HomeGoals': 'goals'
              }),
    epl_1617[['AwayTeam', 'HomeTeam',
              'AwayGoals']].assign(home=0).rename(columns={
                  'AwayTeam': 'team',
                  'HomeTeam': 'opponent',
                  'AwayGoals': 'goals'
              })
])

poisson_model = smf.glm(formula="goals ~ home + team + opponent",
                                                            'opponent': homeTeam,'home':0},
                                                      index=[1])).values[0]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))


epl_1617 = pd.read_csv("E0.csv")
epl_1617 = epl_1617[['HomeTeam','AwayTeam','FTHG','FTAG']]
epl_1617 = epl_1617.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
print(epl_1617.head())

epl_1617 = epl_1617[:-10]
print(epl_1617.mean())


print(skellam.pmf(0.0,  epl_1617.mean()[0],  epl_1617.mean()[1]))

print(skellam.pmf(1,  epl_1617.mean()[0],  epl_1617.mean()[1]))



goal_model_data = pd.concat([epl_1617[['HomeTeam','AwayTeam','HomeGoals']].assign(home=1).rename(
            columns={'HomeTeam':'team', 'AwayTeam':'opponent','HomeGoals':'goals'}),
           epl_1617[['AwayTeam','HomeTeam','AwayGoals']].assign(home=0).rename(
            columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayGoals':'goals'})])

poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data, 
                        family=sm.families.Poisson()).fit()
print(poisson_model.summary())

a=poisson_model.predict(pd.DataFrame(data={'team': 'Chelsea', 'opponent': 'Sunderland',