Example #1
0
def scrape_matches():
    offset = 0
    for season in range(2017,2019):
        print(season)
        soup = load_page('https://www.hockey-reference.com/leagues/NHL_'+str(season)+'_games.html')

        match_table = soup.find("table", { "class" : 'stats_table'})
        tbody = match_table.find('tbody')
        for tr in tbody.find_all('tr'):
            th = tr.find('th',{"class":'left'})
            tds = tr.find_all('td')

            a_score = tds[3].text
            b_score = tds[1].text

            if int(a_score) > int(b_score):
                outcome = 1
            else:
                outcome = 0

            game = {
                'team_a': clean_name(re.sub('(.*?)', '', tds[2].text)),
                'team_b': clean_name(re.sub('(.*?)', '', tds[0].text)),
                'a_score':a_score,
                'b_score':b_score,
                'outcome': outcome,
                'date': th.text,
                'season': season,
                'stats_url': th.find('a', href=True)['href'],
            }
            
            if db.check_game(game) == 0:
                db.insert_game('raw', game)
Example #2
0
def make_training_set():
    print("\n\nCreating Training Set")
    db.clear_table('games')
    games = db.get_all('processed', 'id')
    bar = progressbar.ProgressBar(max_value=len(games))
    cnt = 0
    for g in games:
        bar.update(cnt)
        cnt += 1
        vs_games = g['a_vs_record'] + g['b_vs_record']
        match = {
            'wins':
            stat_avg_diff(g['a_win'], g['a_games'], g['b_win'], g['b_games']),
            'map_score':
            stat_avg_diff(g['a_map_win'], g['a_map_played'], g['b_map_win'],
                          g['b_map_played']),
            'elo':
            round(g['a_elo'] - g['b_elo'], 2),
            'vs':
            stat_avg_diff(g['a_vs_record'], vs_games, g['b_vs_record'],
                          vs_games),
            'score':
            stat_avg_diff(g['a_score'], g['a_games'], g['b_score'],
                          g['b_games']),
            'momentum':
            g['a_momentum'] - g['b_momentum'],
            'kd':
            stat_avg_diff(g['a_kd'], g['a_games'], g['b_kd'], g['b_games']),
            'kast':
            stat_avg_diff(g['a_kast'], g['a_games'], g['b_kast'],
                          g['b_games']),
            'rating':
            stat_avg_diff(g['a_rating'], g['a_games'], g['b_rating'],
                          g['b_games']),
            'adr':
            stat_avg_diff(g['a_adr'], g['a_games'], g['b_adr'], g['b_games']),
            'ts':
            g['a_trueskill'],
            'outcome':
            g['outcome'],
            'date':
            g['date']
        }

        db.insert_game('games', match)
Example #3
0
def process_totals():
    db.clear_table('processed')
    for season in range(1990,2019):
        print(season)
        teams = setup_teams()
        games = db.get_all('raw','id','DESC',season)
        match = {}
        ts = trueskill.TrueSkill(draw_probability=0)

        for g in games:
            try:
                if g['a_score'] != g['b_score']:
                    if teams[g['team_a']]['stats']['games'] > MIN_GAMES_PLAYED and teams[g['team_b']]['stats']['games'] > MIN_GAMES_PLAYED:
                        match = {
                            'team_a': g['team_a'],
                            'team_b': g['team_b'],
                            'a_score': teams[g['team_a']]['stats']['score'],
                            'b_score': teams[g['team_b']]['stats']['score'],
                            'a_score_against': teams[g['team_a']]['stats']['score_against'],
                            'b_score_against': teams[g['team_b']]['stats']['score_against'],
                            'a_elo': teams[g['team_a']]['stats']['elo']+HOME_ADV,
                            'b_elo': teams[g['team_b']]['stats']['elo'],
                            'a_games': teams[g['team_a']]['stats']['games'],
                            'b_games': teams[g['team_b']]['stats']['games'],
                            'a_win': teams[g['team_a']]['stats']['wins'],
                            'b_win': teams[g['team_b']]['stats']['wins'],
                            'a_vs_record': teams[g['team_a']]['teams'][g['team_b']],
                            'b_vs_record': teams[g['team_b']]['teams'][g['team_a']],
                            'a_momentum': teams[g['team_a']]['stats']['momentum'],
                            'b_momentum': teams[g['team_b']]['stats']['momentum'],
                            'a_trueskill': win_probability(teams[g['team_a']]['stats']['ts'], teams[g['team_b']]['stats']['ts']),
                            'b_trueskill': win_probability(teams[g['team_b']]['stats']['ts'], teams[g['team_a']]['stats']['ts']),
                            'outcome': g['outcome'],
                            'season': season,
                        }

                        db.insert_game('processed', match)

                    teams[g['team_a']]['stats']['games'] += 1
                    teams[g['team_b']]['stats']['games'] += 1

                    teams[g['team_a']]['stats']['score'] += g['a_score']
                    teams[g['team_b']]['stats']['score'] += g['b_score']

                    teams[g['team_a']]['stats']['score_against'] += g['b_score']
                    teams[g['team_b']]['stats']['score_against'] += g['a_score']


                    if g['outcome'] == 1:
                        winner = 'team_a'
                        loser = 'team_b'
                        win_score = g['a_score']
                        lose_score = g['b_score']
                    else:
                        winner = 'team_b'
                        loser = 'team_a'
                        win_score = g['b_score']
                        lose_score = g['a_score']

                    teams[g[winner]]['stats']['ts'], teams[g[loser]]['stats']['ts'] = trueskill.rate_1vs1(teams[g[winner]]['stats']['ts'],teams[g[loser]]['stats']['ts'])
                    teams[g[winner]]['stats']['elo'], teams[g[loser]]['stats']['elo'] = elo(teams[g[winner]]['stats']['elo'], teams[g[loser]]['stats']['elo'])
                    teams[g[winner]]['stats']['wins'] +=(win_score-lose_score)
                    teams[g[winner]]['teams'][g[loser]] += 1
                    teams[g[loser]]['stats']['momentum'] = round(teams[g[loser]]['stats']['momentum']/MOMENTUM_DEGRADE,4)
                    teams[g[winner]]['stats']['momentum'] += (1+ math.log(win_score-lose_score)) 

            except Exception as e:
                print(g)
                print(e)
 
    return teams
Example #4
0
def process_totals():
    db.clear_table('processed')
    teams = setup_teams()
    games = db.get_all('raw', 'id', 'DESC')
    match = {}
    ts = trueskill.TrueSkill(draw_probability=0)
    add_list = []

    bar = progressbar.ProgressBar(max_value=len(games))
    print("\nProcessing Team Totals:")
    cnt = 0
    for g in games:
        try:
            bar.update(cnt)
            cnt += 1
            if g['a_score'] != g['b_score']:
                if teams[g['team_a']]['stats']['games'] > MIN_GAMES and teams[
                        g['team_b']]['stats']['games'] > MIN_GAMES:
                    match = {
                        'team_a':
                        g['team_a'],
                        'team_b':
                        g['team_b'],
                        'a_score':
                        teams[g['team_a']]['stats']['score'],
                        'b_score':
                        teams[g['team_b']]['stats']['score'],
                        'a_elo':
                        teams[g['team_a']]['stats']['elo'],
                        'b_elo':
                        teams[g['team_b']]['stats']['elo'],
                        'a_games':
                        teams[g['team_a']]['stats']['games'],
                        'b_games':
                        teams[g['team_b']]['stats']['games'],
                        'a_win':
                        teams[g['team_a']]['stats']['wins'],
                        'b_win':
                        teams[g['team_b']]['stats']['wins'],
                        'a_map_win':
                        teams[g['team_a']]['map_wins'][g['map']],
                        'b_map_win':
                        teams[g['team_b']]['map_wins'][g['map']],
                        'a_map_played':
                        teams[g['team_a']]['map_games'][g['map']],
                        'b_map_played':
                        teams[g['team_b']]['map_games'][g['map']],
                        'a_vs_record':
                        teams[g['team_a']]['teams'][g['team_b']],
                        'b_vs_record':
                        teams[g['team_b']]['teams'][g['team_a']],
                        'a_momentum':
                        teams[g['team_a']]['stats']['momentum'],
                        'b_momentum':
                        teams[g['team_b']]['stats']['momentum'],
                        'a_adr':
                        teams[g['team_a']]['stats']['adr'],
                        'b_adr':
                        teams[g['team_b']]['stats']['adr'],
                        'a_kast':
                        teams[g['team_a']]['stats']['kast'],
                        'b_kast':
                        teams[g['team_b']]['stats']['kast'],
                        'a_kd':
                        round(
                            teams[g['team_a']]['stats']['kills'] /
                            teams[g['team_a']]['stats']['deaths'], 4),
                        'b_kd':
                        round(
                            teams[g['team_b']]['stats']['kills'] /
                            teams[g['team_b']]['stats']['deaths'], 4),
                        'a_rating':
                        teams[g['team_a']]['stats']['rating'],
                        'b_rating':
                        teams[g['team_b']]['stats']['rating'],
                        'a_trueskill':
                        win_probability(teams[g['team_a']]['stats']['ts'],
                                        teams[g['team_b']]['stats']['ts']),
                        'b_trueskill':
                        win_probability(teams[g['team_b']]['stats']['ts'],
                                        teams[g['team_a']]['stats']['ts']),
                        'outcome':
                        g['outcome'],
                        'date':
                        g['date']
                    }

                    db.insert_game('processed', match)

                teams[g['team_a']]['stats']['games'] += 1
                teams[g['team_b']]['stats']['games'] += 1

                teams[g['team_a']]['stats']['score'] += g['a_score']
                teams[g['team_b']]['stats']['score'] += g['b_score']

                teams[g['team_a']]['map_games'][g['map']] += 1
                teams[g['team_b']]['map_games'][g['map']] += 1

                teams[g['team_a']]['stats']['adr'] += g['a_adr']
                teams[g['team_b']]['stats']['adr'] += g['b_adr']

                teams[g['team_a']]['stats']['rating'] += g['a_rating']
                teams[g['team_b']]['stats']['rating'] += g['b_rating']

                teams[g['team_a']]['stats']['kills'] += g['a_kills']
                teams[g['team_b']]['stats']['kills'] += g['b_kills']

                teams[g['team_a']]['stats']['deaths'] += g['a_deaths']
                teams[g['team_b']]['stats']['deaths'] += g['b_deaths']

                teams[g['team_a']]['stats']['kast'] += g['a_kast']
                teams[g['team_b']]['stats']['kast'] += g['b_kast']

                if g['outcome'] == 1:
                    winner, loser = 'team_a', 'team_b'
                    win_score, lose_score = g['a_score'], g['b_score']
                else:
                    winner, loser = 'team_b', 'team_a'
                    win_score, lose_score = g['b_score'], g['a_score']

                teams[g[winner]]['stats']['ts'], teams[
                    g[loser]]['stats']['ts'] = trueskill.rate_1vs1(
                        teams[g[winner]]['stats']['ts'],
                        teams[g[loser]]['stats']['ts'])
                teams[g[winner]]['stats']['elo'], teams[
                    g[loser]]['stats']['elo'] = elo(
                        teams[g[winner]]['stats']['elo'],
                        teams[g[loser]]['stats']['elo'])
                teams[g[winner]]['stats']['wins'] += (
                    1 + math.log(win_score - lose_score))
                teams[g[loser]]['teams'][g[winner]] -= 1

                if teams[g[loser]]['teams'][g[winner]] < 0:
                    teams[g[loser]]['teams'][g[winner]] = 0

                if teams[g[winner]]['teams'][g[loser]] > MAX_VS_MATCHES:
                    teams[g[winner]]['teams'][g[loser]] = MAX_VS_MATCHES

                teams[g[winner]]['teams'][g['team_a']] += 1
                teams[g[winner]]['map_wins'][g['map']] += (
                    1 + math.log(win_score - lose_score))
                teams[g[winner]]['stats']['rating'] += WIN_RATING_SCORE
                teams[g[loser]]['stats']['momentum'] = round(
                    teams[g[loser]]['stats']['momentum'] / LOSS_MOMENTUM, 4)
                teams[g[winner]]['stats']['momentum'] += 1

        except Exception as e:
            print("### Error:", e)
            pass

    return teams
Example #5
0
def new_team_check(team_name):
    if db.check_team_slug(team_name) < 1:
        team = {'team': team_name}
        db.insert_game('teams', team)
Example #6
0
def find_new_games():
    new_games = scrape_matches()
    new_games.reverse()
    print("New Games:", len(new_games))
    for game in new_games:
        db.insert_game('raw', game)