def scrape_matches(): offset = 0 for season in range(2017,2019): print(season) soup = load_page('https://www.hockey-reference.com/leagues/NHL_'+str(season)+'_games.html') match_table = soup.find("table", { "class" : 'stats_table'}) tbody = match_table.find('tbody') for tr in tbody.find_all('tr'): th = tr.find('th',{"class":'left'}) tds = tr.find_all('td') a_score = tds[3].text b_score = tds[1].text if int(a_score) > int(b_score): outcome = 1 else: outcome = 0 game = { 'team_a': clean_name(re.sub('(.*?)', '', tds[2].text)), 'team_b': clean_name(re.sub('(.*?)', '', tds[0].text)), 'a_score':a_score, 'b_score':b_score, 'outcome': outcome, 'date': th.text, 'season': season, 'stats_url': th.find('a', href=True)['href'], } if db.check_game(game) == 0: db.insert_game('raw', game)
def make_training_set(): print("\n\nCreating Training Set") db.clear_table('games') games = db.get_all('processed', 'id') bar = progressbar.ProgressBar(max_value=len(games)) cnt = 0 for g in games: bar.update(cnt) cnt += 1 vs_games = g['a_vs_record'] + g['b_vs_record'] match = { 'wins': stat_avg_diff(g['a_win'], g['a_games'], g['b_win'], g['b_games']), 'map_score': stat_avg_diff(g['a_map_win'], g['a_map_played'], g['b_map_win'], g['b_map_played']), 'elo': round(g['a_elo'] - g['b_elo'], 2), 'vs': stat_avg_diff(g['a_vs_record'], vs_games, g['b_vs_record'], vs_games), 'score': stat_avg_diff(g['a_score'], g['a_games'], g['b_score'], g['b_games']), 'momentum': g['a_momentum'] - g['b_momentum'], 'kd': stat_avg_diff(g['a_kd'], g['a_games'], g['b_kd'], g['b_games']), 'kast': stat_avg_diff(g['a_kast'], g['a_games'], g['b_kast'], g['b_games']), 'rating': stat_avg_diff(g['a_rating'], g['a_games'], g['b_rating'], g['b_games']), 'adr': stat_avg_diff(g['a_adr'], g['a_games'], g['b_adr'], g['b_games']), 'ts': g['a_trueskill'], 'outcome': g['outcome'], 'date': g['date'] } db.insert_game('games', match)
def process_totals(): db.clear_table('processed') for season in range(1990,2019): print(season) teams = setup_teams() games = db.get_all('raw','id','DESC',season) match = {} ts = trueskill.TrueSkill(draw_probability=0) for g in games: try: if g['a_score'] != g['b_score']: if teams[g['team_a']]['stats']['games'] > MIN_GAMES_PLAYED and teams[g['team_b']]['stats']['games'] > MIN_GAMES_PLAYED: match = { 'team_a': g['team_a'], 'team_b': g['team_b'], 'a_score': teams[g['team_a']]['stats']['score'], 'b_score': teams[g['team_b']]['stats']['score'], 'a_score_against': teams[g['team_a']]['stats']['score_against'], 'b_score_against': teams[g['team_b']]['stats']['score_against'], 'a_elo': teams[g['team_a']]['stats']['elo']+HOME_ADV, 'b_elo': teams[g['team_b']]['stats']['elo'], 'a_games': teams[g['team_a']]['stats']['games'], 'b_games': teams[g['team_b']]['stats']['games'], 'a_win': teams[g['team_a']]['stats']['wins'], 'b_win': teams[g['team_b']]['stats']['wins'], 'a_vs_record': teams[g['team_a']]['teams'][g['team_b']], 'b_vs_record': teams[g['team_b']]['teams'][g['team_a']], 'a_momentum': teams[g['team_a']]['stats']['momentum'], 'b_momentum': teams[g['team_b']]['stats']['momentum'], 'a_trueskill': win_probability(teams[g['team_a']]['stats']['ts'], teams[g['team_b']]['stats']['ts']), 'b_trueskill': win_probability(teams[g['team_b']]['stats']['ts'], teams[g['team_a']]['stats']['ts']), 'outcome': g['outcome'], 'season': season, } db.insert_game('processed', match) teams[g['team_a']]['stats']['games'] += 1 teams[g['team_b']]['stats']['games'] += 1 teams[g['team_a']]['stats']['score'] += g['a_score'] teams[g['team_b']]['stats']['score'] += g['b_score'] teams[g['team_a']]['stats']['score_against'] += g['b_score'] teams[g['team_b']]['stats']['score_against'] += g['a_score'] if g['outcome'] == 1: winner = 'team_a' loser = 'team_b' win_score = g['a_score'] lose_score = g['b_score'] else: winner = 'team_b' loser = 'team_a' win_score = g['b_score'] lose_score = g['a_score'] teams[g[winner]]['stats']['ts'], teams[g[loser]]['stats']['ts'] = trueskill.rate_1vs1(teams[g[winner]]['stats']['ts'],teams[g[loser]]['stats']['ts']) teams[g[winner]]['stats']['elo'], teams[g[loser]]['stats']['elo'] = elo(teams[g[winner]]['stats']['elo'], teams[g[loser]]['stats']['elo']) teams[g[winner]]['stats']['wins'] +=(win_score-lose_score) teams[g[winner]]['teams'][g[loser]] += 1 teams[g[loser]]['stats']['momentum'] = round(teams[g[loser]]['stats']['momentum']/MOMENTUM_DEGRADE,4) teams[g[winner]]['stats']['momentum'] += (1+ math.log(win_score-lose_score)) except Exception as e: print(g) print(e) return teams
def process_totals(): db.clear_table('processed') teams = setup_teams() games = db.get_all('raw', 'id', 'DESC') match = {} ts = trueskill.TrueSkill(draw_probability=0) add_list = [] bar = progressbar.ProgressBar(max_value=len(games)) print("\nProcessing Team Totals:") cnt = 0 for g in games: try: bar.update(cnt) cnt += 1 if g['a_score'] != g['b_score']: if teams[g['team_a']]['stats']['games'] > MIN_GAMES and teams[ g['team_b']]['stats']['games'] > MIN_GAMES: match = { 'team_a': g['team_a'], 'team_b': g['team_b'], 'a_score': teams[g['team_a']]['stats']['score'], 'b_score': teams[g['team_b']]['stats']['score'], 'a_elo': teams[g['team_a']]['stats']['elo'], 'b_elo': teams[g['team_b']]['stats']['elo'], 'a_games': teams[g['team_a']]['stats']['games'], 'b_games': teams[g['team_b']]['stats']['games'], 'a_win': teams[g['team_a']]['stats']['wins'], 'b_win': teams[g['team_b']]['stats']['wins'], 'a_map_win': teams[g['team_a']]['map_wins'][g['map']], 'b_map_win': teams[g['team_b']]['map_wins'][g['map']], 'a_map_played': teams[g['team_a']]['map_games'][g['map']], 'b_map_played': teams[g['team_b']]['map_games'][g['map']], 'a_vs_record': teams[g['team_a']]['teams'][g['team_b']], 'b_vs_record': teams[g['team_b']]['teams'][g['team_a']], 'a_momentum': teams[g['team_a']]['stats']['momentum'], 'b_momentum': teams[g['team_b']]['stats']['momentum'], 'a_adr': teams[g['team_a']]['stats']['adr'], 'b_adr': teams[g['team_b']]['stats']['adr'], 'a_kast': teams[g['team_a']]['stats']['kast'], 'b_kast': teams[g['team_b']]['stats']['kast'], 'a_kd': round( teams[g['team_a']]['stats']['kills'] / teams[g['team_a']]['stats']['deaths'], 4), 'b_kd': round( teams[g['team_b']]['stats']['kills'] / teams[g['team_b']]['stats']['deaths'], 4), 'a_rating': teams[g['team_a']]['stats']['rating'], 'b_rating': teams[g['team_b']]['stats']['rating'], 'a_trueskill': win_probability(teams[g['team_a']]['stats']['ts'], teams[g['team_b']]['stats']['ts']), 'b_trueskill': win_probability(teams[g['team_b']]['stats']['ts'], teams[g['team_a']]['stats']['ts']), 'outcome': g['outcome'], 'date': g['date'] } db.insert_game('processed', match) teams[g['team_a']]['stats']['games'] += 1 teams[g['team_b']]['stats']['games'] += 1 teams[g['team_a']]['stats']['score'] += g['a_score'] teams[g['team_b']]['stats']['score'] += g['b_score'] teams[g['team_a']]['map_games'][g['map']] += 1 teams[g['team_b']]['map_games'][g['map']] += 1 teams[g['team_a']]['stats']['adr'] += g['a_adr'] teams[g['team_b']]['stats']['adr'] += g['b_adr'] teams[g['team_a']]['stats']['rating'] += g['a_rating'] teams[g['team_b']]['stats']['rating'] += g['b_rating'] teams[g['team_a']]['stats']['kills'] += g['a_kills'] teams[g['team_b']]['stats']['kills'] += g['b_kills'] teams[g['team_a']]['stats']['deaths'] += g['a_deaths'] teams[g['team_b']]['stats']['deaths'] += g['b_deaths'] teams[g['team_a']]['stats']['kast'] += g['a_kast'] teams[g['team_b']]['stats']['kast'] += g['b_kast'] if g['outcome'] == 1: winner, loser = 'team_a', 'team_b' win_score, lose_score = g['a_score'], g['b_score'] else: winner, loser = 'team_b', 'team_a' win_score, lose_score = g['b_score'], g['a_score'] teams[g[winner]]['stats']['ts'], teams[ g[loser]]['stats']['ts'] = trueskill.rate_1vs1( teams[g[winner]]['stats']['ts'], teams[g[loser]]['stats']['ts']) teams[g[winner]]['stats']['elo'], teams[ g[loser]]['stats']['elo'] = elo( teams[g[winner]]['stats']['elo'], teams[g[loser]]['stats']['elo']) teams[g[winner]]['stats']['wins'] += ( 1 + math.log(win_score - lose_score)) teams[g[loser]]['teams'][g[winner]] -= 1 if teams[g[loser]]['teams'][g[winner]] < 0: teams[g[loser]]['teams'][g[winner]] = 0 if teams[g[winner]]['teams'][g[loser]] > MAX_VS_MATCHES: teams[g[winner]]['teams'][g[loser]] = MAX_VS_MATCHES teams[g[winner]]['teams'][g['team_a']] += 1 teams[g[winner]]['map_wins'][g['map']] += ( 1 + math.log(win_score - lose_score)) teams[g[winner]]['stats']['rating'] += WIN_RATING_SCORE teams[g[loser]]['stats']['momentum'] = round( teams[g[loser]]['stats']['momentum'] / LOSS_MOMENTUM, 4) teams[g[winner]]['stats']['momentum'] += 1 except Exception as e: print("### Error:", e) pass return teams
def new_team_check(team_name): if db.check_team_slug(team_name) < 1: team = {'team': team_name} db.insert_game('teams', team)
def find_new_games(): new_games = scrape_matches() new_games.reverse() print("New Games:", len(new_games)) for game in new_games: db.insert_game('raw', game)