コード例 #1
0
ファイル: driver.py プロジェクト: Engy-22/BaseballSync
def consolidate_data(year):
    driver_logger.log("\tConsolidating data")
    print("Consolidating data")
    start_time = time.time()
    logger.log("Consolidating team data || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    db = DatabaseConnection(sandbox_mode)
    for ty_uid in db.read(
            'select ty_uniqueidentifier from team_years where year = ' +
            str(year) + ';'):
        team_start_time = time.time()
        logger.log('\t' + db.read(
            'select teamId from team_years where ty_uniqueidentifier = ' +
            str(ty_uid[0]) + ';')[0][0])
        write_roster_info(
            ty_uid[0], {
                'hitter_spots':
                consolidate_hitter_spots(ty_uid[0]),
                'player_positions':
                consolidate_player_positions(ty_uid[0]),
                'batter_stats':
                consolidate_player_stats(ty_uid[0], 'batting', year),
                'pitcher_stats':
                consolidate_player_stats(ty_uid[0], 'pitching', year),
                'fielder_stats':
                consolidate_player_stats(ty_uid[0], 'fielding', year)
            })
        logger.log('\t\tTime = ' +
                   time_converter(time.time() - team_start_time))
    db.close()
    total_time = time_converter(time.time() - start_time)
    logger.log("Done consolidating team data: Time = " + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #2
0
def simulation(away_team_id, away_year, away_year_info, home_team_id,
               home_year, home_year_info, games):
    start_time = time.time()
    clear_logs('controller')
    team_object_time = time.time()
    logger.log("Creating team objects")
    away_team = Team(away_team_id, away_year)
    home_team = Team(home_team_id, home_year)
    logger.log("\t" + time_converter(time.time() - team_object_time))
    away_team_wins = 0
    home_team_wins = 0
    strike_zone = {
        'x': strike_zone_coordinate('x'),
        'y': strike_zone_coordinate('y')
    }
    for game in range(games):
        game_data = simulate_game(game + 1, away_team,
                                  away_team.get_team_info(),
                                  away_year_info, home_team,
                                  home_team.get_team_info(), home_year_info,
                                  away_year, home_year, strike_zone)
        if game_data['winner'] == away_team.get_team_id():
            away_team_wins += 1
        else:
            home_team_wins += 1
    logger.log('Simulation complete: Time = ' +
               time_converter(time.time() - start_time))
    return determine_series_winner(away_team, away_team_wins, home_team,
                                   home_team_wins, games)
コード例 #3
0
def pitcher_spray_chart_constructor(year):
    print("creating pitcher spray charts")
    start_time = time.time()
    global bad_gateway_data
    bad_gateway_data = []
    logger.log("Downloading " + str(year) + " pitcher spray charts || Timestamp: " + datetime.datetime.today()\
               .strftime('%Y-%m-%d %H:%M:%S'))
    if year >= 1988:
        driver_logger.log("\tCreating pitcher spray charts")
        db = DatabaseConnection(sandbox_mode)
        pt_uid_players = set(
            db.read(
                'select PT_uniqueidentifier from player_pitching where year = '
                + str(year) + ' and pa_infield is NULL;'))
        db.close()
        with ThreadPoolExecutor(os.cpu_count()) as executor:
            for ent in pt_uid_players:
                executor.submit(reduce_functionality, year, ent)
        driver_logger.log("\t\tTime = " +
                          time_converter(time.time() - start_time))
    else:
        driver_logger.log("\tNo pitcher spray chart data before 1988")
        logger.log("\tNo spray pitcher chart data before 1988")
        return
    if len(bad_gateway_data) > 0:
        revisit_bad_gateways(year, bad_gateway_data)
    logger.log("Done downloading pitcher spray charts: time = " +
               time_converter(time.time() - start_time) + '\n\n')
コード例 #4
0
def manager_tendencies(year):
    driver_logger.log("\tStoring manager tendencies")
    print("storing manager tendencies")
    start_time = time.time()
    logger.log("Downloading " + str(year) +
               " manager tendencies || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log('\tMaking HTTP requests')
    db = DatabaseConnection(sandbox_mode)
    managers = db.read(
        'select manager_teams.managerid, manager_teams.teamid from manager_teams, manager_year where '
        'manager_year.year = ' + str(year) +
        ' and manager_year.mt_uniqueidentifier = manager_teams.'
        'mt_uniqueidentifier;')
    db.close()
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for manager in managers:
            executor.submit(load_url, manager[0], manager[1])
    logger.log('\t\tTime = ' + time_converter(time.time() - start_time))
    process_manager_tendencies(year)
    write_time = time.time()
    logger.log('\tWriting data to database')
    global stats
    with ThreadPoolExecutor(os.cpu_count()) as executor2:
        for manager_team, tendencies in stats.items():
            if len(tendencies) > 0:
                executor2.submit(write_to_file, year, manager_team, tendencies)
    logger.log('\t\tTime = ' + time_converter(time.time() - write_time))
    total_time = time_converter(time.time() - start_time)
    driver_logger.log("\t\tTime = " + total_time)
    logger.log("Done storing manager tendencies: time = " + total_time +
               '\n\n')
コード例 #5
0
def populate_teams_table(year):
    driver_logger.log('\tPopulating teams table')
    print("Populating teams table")
    start_time = time.time()
    logger.log('Begin populating teams table for ' + str(year) +
               ' || Timestamp: ' +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    with open(os.path.join("..", "background", "yearTeams.txt"), 'rt') as file:
        db = DatabaseConnection(sandbox_mode)
        db.write('ALTER TABLE teams DROP INDEX teamId;')
        for line in file:
            if str(year) in line:
                temp_line = line.split(',')[1:-1]
                for team in temp_line:
                    team_id = team.split(';')[0]
                    db.write('insert into teams (teamId, teamName) values ("' +
                             team_id + '", "' +
                             translate_team_name(team_id).replace("'", "\'") +
                             '");')
                break
    db.write('ALTER TABLE teams ADD INDEX(teamId);')
    db.close()
    total_time = time.time() - start_time
    logger.log('Populating teams table completed: ' +
               time_converter(total_time))
    driver_logger.log('\t\tTime = ' + time_converter(total_time))
コード例 #6
0
ファイル: rank_driver.py プロジェクト: Engy-22/BaseballSync
def rank_driver(year):
    print("\n\ncalculating team ranks (year)")
    driver_logger.log("\tBeginning rank driver")
    start_time = time.time()
    logger.log("Beginning rank driver || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log("\tCalculating team ranks (year)")
    runs = {}
    allowed = {}
    difference = {}
    standard_deviation_for = {}
    standard_deviation_against = {}
    standard_deviation_ovr = {}
    ws_winners = {}
    driver_logger.log("\t\tCalculating team ranks (year)")
    for data_year in range(year, get_oldest_year() - 1, -1):
        runs[data_year], allowed[data_year], difference[
            data_year] = team_ranker_year(data_year)
        standard_deviation_for[str(data_year)] = stdev(
            [team_runs_for[1] for team_runs_for in runs[data_year]])
        standard_deviation_against[str(data_year)] = stdev(
            [team_runs_against[1] for team_runs_against in allowed[data_year]])
        standard_deviation_ovr[str(data_year)] = stdev(
            [team_runs_diff[1] for team_runs_diff in difference[data_year]])
        ws_winners[data_year] = get_ws_winner(data_year)
    total_time = time_converter(time.time() - start_time)
    logger.log("\t\tTime = " + total_time)
    driver_logger.log("\t\t\tTime = " + total_time)
    second_time = time.time()
    driver_logger.log("\t\tCalculating team ranks (overall)")
    logger.log("\tCalculating team ranks (overall)")
    print("calculating team ranks (overall)")
    total_list = []
    years = [value for key, value in runs.items()]
    for ent in years:
        for team_total in ent:
            total_list.append(team_total[1])
    average_deviation_for = mean(
        [value for key, value in standard_deviation_for.items()])
    average_deviation_against = mean(
        [value for key, value in standard_deviation_against.items()])
    average_deviation_diff = mean(
        [value for key, value in standard_deviation_ovr.items()])
    all_time_rpg = get_all_time_rpg()
    team_ranker_ovr(runs, True, "offRank_ovr", all_time_rpg,
                    standard_deviation_for, average_deviation_for)
    team_ranker_ovr(allowed, False, "defRank_ovr", all_time_rpg,
                    standard_deviation_against, average_deviation_against)
    team_ranker_ovr(difference, True, "ovrRank_ovr", all_time_rpg,
                    standard_deviation_ovr, average_deviation_diff, ws_winners)
    second = time_converter(time.time() - second_time)
    logger.log("\t\tTime = " + second)
    driver_logger.log("\t\t\tTime = " + second)
    total_time = time_converter(time.time() - start_time)
    logger.log("Rank driver complete: time = " + total_time + '\n\n')
    driver_logger.log("\t\tRank driver time = " + total_time)
コード例 #7
0
def get_pitch_fx_data(year, month=None, day=None):
    if year < 2008:
        driver_logger.log("\tNo pitch fx data to download before 2008")
        return
    start_time = time.time()
    if month is None and day is None:
        driver_logger.log("\tFetching " + str(year) + " pitch fx data")
        print("Fetching " + str(year) + " pitch fx data")
        logger.log("Downloading pitch fx data for " + str(year) +
                   " || Timestamp: " +
                   datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
        db = DatabaseConnection(sandbox_mode)
        opening_day = db.read('select opening_day from years where year = ' +
                              str(year) + ';')[0][0]
        db.close()
        for month in range(3, 12, 1):
            # if month > 11:
            if month >= int(opening_day.split('-')[0]):
                for day in range(1, 32, 1):
                    # if day > 14:
                    if month == int(
                            opening_day.split('-')[0]) and int(day) < int(
                                opening_day.split('-')[1]):
                        continue
                    if len(str(day)) == 1:
                        this_day = '0' + str(day)
                    else:
                        this_day = str(day)
                    if len(str(month)) == 1:
                        this_month = '0' + str(month)
                    else:
                        this_month = str(month)
                    get_day_data(this_day, this_month, str(year))
        logger.log("Done fetching " + str(year) + " pitch fx data: time = " +
                   time_converter(time.time() - start_time) + '\n\n\n\n')
        driver_logger.log("\t\tTime = " +
                          time_converter(time.time() - start_time))
        aggregate_pitch_fx(year)
    else:
        driver_logger.log("\tFetching " + str(month) + "-" + str(day) + "-" +
                          str(year) + " pitch fx data")
        print("Fetching " + str(month) + "-" + str(day) + "-" + str(year) +
              " pitch fx data")
        logger.log("Downloading pitch fx data for " + str(month) + "-" +
                   str(day) + "-" + str(year) + " || Timestamp: " +
                   datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
        get_day_data(str(day), str(month), str(year))
        driver_logger.log("\t\tTime = " +
                          time_converter(time.time() - start_time))
        aggregate_pitch_fx(year, month, day)
コード例 #8
0
def simulate_game(game_num, away_team, away_team_info, away_year_info,
                  home_team, home_team_info, home_year_info, away_year,
                  home_year, strike_zone):
    driver_logger.log("Starting game " + str(game_num) + " simulation: " +
                      away_team.get_team_id() + " @ " +
                      home_team.get_team_id())
    start_time = time.time()
    logger.log("Starting game " + str(game_num) + " simulation: " +
               away_team.get_team_id() + " @ " + home_team.get_team_id())
    game_data = {}
    game = Game(away_team.get_team_id(), home_team.get_team_id())
    league = League(home_team.get_team_id(), home_team.get_year())
    lineup_time = time.time()
    logger.log("\tCreating lineups")
    away_pitcher = get_starting_pitcher(away_team.get_team_id(), away_year,
                                        game_num)
    home_pitcher = get_starting_pitcher(home_team.get_team_id(), home_year,
                                        game_num)
    away_team.set_lineup(away_pitcher, home_pitcher, use_dh=league.get_rules())
    home_team.set_lineup(home_pitcher, away_pitcher, use_dh=league.get_rules())
    logger.log("\t\t" + time_converter(time.time() - lineup_time))
    while game.get_inning() <= 9 or game.get_away_score(
    ) == game.get_home_score():
        inning_data = simulate_inning(
            game, away_team_info, home_team_info, away_year_info,
            home_year_info, {
                'top': away_team.get_batting_order(),
                'bottom': home_team.get_batting_order()
            }, {
                'top': away_team.get_lineup_place(),
                'bottom': home_team.get_lineup_place()
            }, {
                'top': home_team.get_pitcher(),
                'bottom': away_team.get_pitcher()
            }, strike_zone, logger)
        away_team.set_batting_order(inning_data['top']['lineup'])
        home_team.set_batting_order(inning_data['bottom']['lineup'])
        away_team.set_lineup_place(inning_data['top']['place'])
        home_team.set_lineup_place(inning_data['bottom']['place'])
        game.increment_away_score(inning_data['top']['runs'])
        game.increment_home_score(inning_data['bottom']['runs'])
    game_data['winner'] = determine_winner(game, away_team, home_team)
    total_time = time_converter(time.time() - start_time)
    logger.log("Done simulating game: " + away_team.get_team_id() + " @ " +
               home_team.get_team_id() + ": Time = " + total_time + '\n\n')
    driver_logger.log("\tDone simulating game: " + away_team.get_team_id() +
                      " @ " + home_team.get_team_id() + ": Time = " +
                      total_time)
    return game_data
コード例 #9
0
def process_manager_tendencies(year):
    start_time = time.time()
    logger.log('\tProcessing manager tendencies')
    global pages
    global stats
    for manager_team, tendencies in pages.items():
        stats[manager_team] = {}
        stats_to_consider = [
            'steal_2b_chances', 'steal_2b_attempts', 'steal_3b_chances',
            'steal_3b_attempts', 'sac_bunt_chances', 'sac_bunts',
            'ibb_chances', 'ibb', 'pinch_hitters', 'pinch_runners',
            'pitchers_used_per_game'
        ]
        try:
            rows = str(tendencies).split('<h2>Managerial Tendencies</h2>'
                                         )[1].split('tbody>')[1].split('<tr>')
            for row in rows:
                try:
                    if row.split('.shtml">')[1].split('</a>')[0] == str(year):
                        for stat in stats_to_consider:
                            for datum in row.split('<td'):
                                if stat in datum:
                                    stats[manager_team][stat] = row.split(
                                        'data-stat="' + stat +
                                        '">')[1].split('</td>')[0]
                                    break
                        break
                except IndexError:
                    continue
        except IndexError:
            continue
    logger.log('\t\tTime = ' + time_converter(time.time() - start_time))
コード例 #10
0
ファイル: inning.py プロジェクト: Engy-22/BaseballSync
def simulate_inning(game, away_team_info, home_team_info, away_year_info, home_year_info, lineup, place, pitcher,
                    strike_zone, driver_logger):
    inning_num = str(game.get_inning())
    driver_logger.log('\tInning ' + inning_num)
    start_time = time.time()
    inning = Inning()
    inning_data = {'top': {}, 'bottom': {}}
    batting_team_info = {'top': away_team_info, 'bottom': home_team_info}
    pitching_team_info = {'bottom': away_team_info, 'top': home_team_info}
    batting_year_info = {'top': away_year_info, 'bottom': home_year_info}  # pitchers' batting stats for a given year
    pitching_year_info = {'top': home_year_info, 'bottom': away_year_info}  # league pitch_fx data
    logger.log("Starting inning simulation: " + game.get_away_team() + " @ " + game.get_home_team() + " - "
               + inning_num)
    for half in ['top', 'bottom']:
        inning.set_half_inning(half)
        for key, value in simulate_half_inning(game, batting_team_info[half], pitching_team_info[half],
                                               batting_year_info[half], pitching_year_info[half], inning, lineup[half],
                                               place[half], strike_zone, pitcher[half]).items():
            inning_data[half][key] = value  # put the half inning data into the inning data dictionary
    game.increment_inning()
    total_time = time_converter(time.time() - start_time)
    driver_logger.log('\t\tTime = ' + total_time)
    logger.log("Done simulating inning: " + game.get_away_team() + " @ " + game.get_home_team() + " - " + inning_num
               + ": Time = " + total_time + '\n\n')
    return inning_data
コード例 #11
0
def all_star_finder(year, normal, driver_logger):
    driver_logger.log("\tFinding " + str(year) + " all stars")
    start_time = time.time()
    logger.log("Finding All Stars || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    all_stars = []
    if normal:
        all_star_table = str(
            BeautifulSoup(
                urlopen('https://www.baseball-reference.com/allstar/' +
                        str(year) + '-allstar-game.shtml'), 'html.parser'))
        nl_table = all_star_table.split('<table>')[2].split('</table>')[0]
        al_table = all_star_table.split('<table>')[1].split('</table>')[0]
        all_stars += get_all_stars(nl_table, '<tr class="">')
        all_stars += get_all_stars(al_table, '<tr class="">')
        write_to_file(year, all_stars)
    else:
        leagues = ['NL', 'AL']
        for league in leagues:
            all_star_table = str(
                BeautifulSoup(
                    urlopen('https://www.baseball-reference.com/leagues/' +
                            league + '/' + str(year) + '-other-leaders.shtml'),
                    'html.parser'))
            all_star_table1 = all_star_table.split('<h2>League All-Stars</h2>')[1].split('<tbody>')[1].\
                                             split('</tbody')[0]
            all_stars += get_all_stars(all_star_table1, '<tr >')
            if year != 1945:
                all_star_table2 = all_star_table.split('<h2>League All-Stars</h2>')[2].split('<tbody>')[1].\
                                                 split('</tbody')[0]
                all_stars += get_all_stars(all_star_table2, '<tr >')
        write_to_file(year, all_stars)
    total_time = time_converter(time.time() - start_time)
    logger.log("All star finder complete: time = " + total_time)
    driver_logger.log("\t\tTime = " + total_time)
コード例 #12
0
ファイル: mvp_cy_young.py プロジェクト: Engy-22/BaseballSync
def mvp_cy_young(year, driver_logger):
    driver_logger.log("\tFinding " + str(year) + " MVPs and Cy Youngs")
    start_time = time.time()
    logger.log("Finding " + str(year) + " MVPs and Cy Youngs || Timestamp: " + datetime.datetime.today().\
               strftime('%Y-%m-%d %H:%M:%S'))
    table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/awards/mvp_cya.shtml'), 'html.parser'))
    rows = table.split('</tr></thead>')[1].split('</table>')[0].split('<tr valign=')
    award_winners = {}
    for row in rows:
        try:
            if str(year) == row.split('"top"><td>')[1].split('</td>')[0]:
                pass
            else:
                continue
        except IndexError:
            continue
        if 'NLmvp' in row:
            award_winners['nl_mvp'] = \
                row.split('NLmvp')[0].split('<a href="/players/')[-1].split('/')[1].split('.shtml"')[0]
        if 'ALmvp' in row:
            award_winners['al_mvp'] = \
                row.split('ALmvp')[0].split('<a href="/players/')[-1].split('/')[1].split('.shtml"')[0]
        if 'NLcya' in row:
            award_winners['nl_cyYoung'] = \
                row.split('NLcya')[0].split('<a href="/players/')[-1].split('/')[1].split('.shtml"')[0]
        if 'ALcya' in row:
            award_winners['al_cyYoung'] = \
                row.split('ALcya')[0].split('<a href="/players/')[-1].split('/')[1].split('.shtml"')[0]
    total_time = time_converter(time.time() - start_time)
    logger.log("MVP and Cy Young finder complete: time = " + total_time)
    driver_logger.log("\t\tTime = " + total_time)
    return award_winners
コード例 #13
0
def main(from_server, begin_year, end_year, frame=None):
    print('\n')
    if end_year > begin_year >= 1876:
        driver_logger.log(
            'Begin Yearly Driver || Timestamp: ' +
            datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
        start_time = time.time()
        if not from_server:
            frame.withdraw()
        league_table_constructor()
        manager_table_constructor()
        years = []
        for year in range(begin_year, end_year, 1):
            years.append(year)
            driver(year)
        create_strike_zone()
        rank_driver(years[-1])
        comparisons_driver(years[-1])
        hof_finder()
        clean_up_deadlocked_file()
        auto_migrate()
        driver_logger.log('Driver complete for year' + stringify_list(years) +
                          ': time = ' +
                          time_converter(time.time() - start_time) + '\n\n\n')
    else:
        print(
            'Begin year must be lower than End year, but cannot be lower than 1876.'
        )
    exit()
コード例 #14
0
ファイル: driver_daily.py プロジェクト: Engy-22/BaseballSync
def main(from_server, day, month, year, frame=None):
    print('\n')
    if 0 < day <= 31 and 0 < month <= 12 and year >= 1876:
        try:
            driver_logger.log('Begin Daily Driver || Timestamp: ' + datetime.datetime.today().
                              strftime('%Y-%m-%d %H:%M:%S'))
            start_time = time.time()
            if not from_server:
                frame.withdraw()
            league_table_constructor()
            manager_table_constructor()
            driver(day, month, year)
            create_strike_zone()
            clean_up_deadlocked_file()
            auto_migrate()
            driver_logger.log('Driver complete for year ' + str(year) + ': time = '
                              + time_converter(time.time()-start_time) + '\n')
        except Exception as e:
            driver_logger.log("ERROR:\t" + str(e))
            send_results()
            raise e
    else:
        print('Must enter a valid date.')
    send_results()
    exit()
コード例 #15
0
ファイル: driver_daily.py プロジェクト: Engy-22/BaseballSync
def driver(day, month, year):
    driver_logger.log(str(month) + '/' + str(day) + '/' + str(year))
    driver_time = time.time()
    print('\n\n' + str(month) + '/' + str(day) + '/' + str(year))
    populate_teams_table(year)
    get_year_data(year)
    ballpark_and_manager_data(year)
    league_standings(year)
    team_offensive_statistics(year)
    team_defensive_statistics(year)
    batting_constructor(year)
    pitching_constructor(year)
    fielding_constructor(year)
    team_fielding_file_constructor(year)
    team_pitching_rotation_constructor(year)
    team_batting_order_constructor(year)
    primary_and_secondary_positions(year)
    determine_pitcher_roles_year(year)
    get_pitch_fx_data(year, month, day)
    hitter_tendencies(year)
    pitcher_tendencies(year)
    manager_tendencies(year)
    hitter_spray_chart_constructor(year)
    pitcher_spray_chart_constructor(year)
    team_certainties(year)
    consolidate_data(year)
    driver_logger.log('Time taken to download ' + str(month) + '/' + str(day) + '/' + str(year) + ' data: '
                      + time_converter(time.time()-driver_time) + '\n')
コード例 #16
0
def team_ranker_ovr(data, greater_than, field, all_time_rpg, standard_deviation, average_deviation, playoff_data=None):
    logger = Logger(os.path.join(log_prefix, "import_data", "team_ranker_ovr.log"))
    logger.log("Calculating overall team ranks: " + field)
    start_time = time.time()
    final_data = {}
    if field != "ovrRank_ovr":
        for year, value in data.items():
            final_data[year] = []
            for ent in value:
                if field == "offRank_ovr":
                    final_data[year].append([ent[0], (ent[1]/all_time_rpg) /
                                             (standard_deviation[str(year)]/average_deviation)])
                else:
                    final_data[year].append([ent[0], (ent[1]/all_time_rpg) *
                                             (standard_deviation[str(year)]/average_deviation)])
    else:
        for year, value in data.items():
            final_data[year] = []
            for ent in value:
                for team_value in data[year]:
                    if team_value[0] == ent[0]:
                        playoff_bump = 1.0
                        for accomplishment, team_id in playoff_data.items():
                            if team_id == ent[0]:
                                if accomplishment == 'ws_champ':
                                    playoff_bump += 0.005
                                playoff_bump += 0.005
                        final_data[year].append([ent[0], (ent[1]/(standard_deviation[str(year)]/average_deviation)) *
                                                 playoff_bump])
    write_to_file(final_data, greater_than, field)
    total_time = time_converter(time.time() - start_time)
    logger.log("\tTime = " + total_time + '\n\n')
コード例 #17
0
ファイル: moy_gatherer.py プロジェクト: Engy-22/BaseballSync
def moy_gatherer(year, driver_logger):
    driver_logger.log("\tFinding " + str(year) + " managers of the year")
    start_time = time.time()
    logger.log("Finding " + str(year) + " managers of the year || Timestamp: " + datetime.datetime.today().\
               strftime('%Y-%m-%d %H:%M:%S'))
    award_winners = {}
    table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/awards/manage.shtml'), 'html.parser')).\
        split('</thead></table></div></div></div></div></div></body></html>')[1].split('<!--')[0]
    rows = table.split('<tr>')
    for row in rows:
        try:
            if str(year) == row.split('<td valign="top">')[1].split('<')[0]:
                pass
            else:
                continue
        except IndexError:
            continue
        award_winners['NL_moy'] = row.split('NLmoy')[0].split(
            '<a href="/managers/')[-1].split('.shtml')[0]
        award_winners['AL_moy'] = row.split('ALmoy')[0].split(
            '<a href="/managers/')[-1].split('.shtml')[0]
    total_time = time_converter(time.time() - start_time)
    logger.log("Manager of the year finder complete: time = " + total_time)
    driver_logger.log("\t\tTime = " + total_time)
    return award_winners
コード例 #18
0
def roy_gatherer(year, driver_logger):
    driver_logger.log("\tFinding " + str(year) + " Rookies of the year")
    start_time = time.time()
    logger.log("Finding " + str(year) + " Rookies of the year || Timestamp: " + datetime.datetime.today().\
               strftime('%Y-%m-%d %H:%M:%S'))
    award_winners = {}
    table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/awards/roy_rol.shtml'), 'html.parser')).\
        split('</tr></thead>')[1].split('</table>')[0]
    rows = table.split('<tr ')
    for row in rows:
        try:
            if str(year) == row.split('valign="top"><td>')[1].split(
                    '</td>')[0]:
                pass
            else:
                continue
            if 'NLroy' in row:
                award_winners['nl_roy'] = \
                    row.split('NLroy')[0].split('<a href="/players/')[-1].split('/')[1].split('.shtml"')[0]
            if 'ALroy' in row:
                award_winners['al_roy'] = \
                    row.split('ALroy')[0].split('<a href="/players/')[-1].split('/')[1].split('.shtml"')[0]
        except IndexError:
            continue
    total_time = time_converter(time.time() - start_time)
    logger.log("Rookie of the year finder complete: time = " + total_time)
    driver_logger.log("\t\tTime = " + total_time)
    return award_winners
コード例 #19
0
def team_certainties(year):
    print('aggregating team statistic certainties')
    driver_logger.log("\tAggregating team statistic certainties")
    start_time = time.time()
    logger.log("Calculating team certainties || Timestamp: " + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    db = DatabaseConnection(sandbox_mode)
    stat_types = ["batting", "pitching"]
    for stat_type in stat_types:
        ty_uids = db.read('select ty_uniqueidentifier, teamid from team_years where year = ' + str(year))
        for ty_uid in ty_uids:
            pau = 0
            player_list = list(db.read('select playerid from player_positions where ty_uniqueidentifier = '
                                       + str(ty_uid[0]) + ';'))
            for player in player_list:
                pt_uid = db.read('select pt_uniqueidentifier from player_teams where playerid = "' + player[0] + '" and'
                                 ' teamid = "' + ty_uid[1] + '";')[0][0]
                try:
                    ent = db.read('select pa, certainty from player_' + stat_type + ' where year = ' + str(year)
                                  + ' and pt_uniqueidentifier = ' + str(pt_uid) + ';')
                    pau += int(ent[0][0]) - (int(ent[0][0]) * float(ent[0][1]))
                except IndexError:
                    continue
                except TypeError:
                    continue
            pa = int(db.read('select pa from team_years where ty_uniqueidentifier = ' + str(ty_uid[0]) + ';')[0][0])
            db.write('update team_years set certainty = ' + str((pa - pau) / pa) + ' where ty_uniqueidentifier = '
                     + str(ty_uid[0]) + ';')
    db.close()
    total_time = time_converter(time.time() - start_time)
    logger.log("Done calculating team certainties: time = " + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #20
0
def hof_finder():
    print("adding HOF data")
    driver_logger.log("\tAdding HOF data")
    start_time = time.time()
    logger.log("Begin finding hall of famers || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    hof_table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/awards/hof.shtml'), 'html.parser')).\
        split('<tbody>')[1].split('</tbody>')[0]
    rows = hof_table.split('<tr>')[1:]
    db = DatabaseConnection(sandbox_mode)
    for row in rows:
        person = row.split('data-append-csv="')[1].split('"')[0]
        year = row.split('<a href="/awards/hof_')[1].split('.shtml')[0]
        induction_type = row.split('data-stat="category_hof">')[1].split(
            '<')[0]
        if induction_type == 'Player':
            db.write('update players set HOF = ' + str(year) +
                     ' where playerId = "' + person + '";')
        elif induction_type == 'Manager':
            db.write('update managers set HOF = ' + str(year) +
                     ' where managerId = "' + person + '";')
        else:
            continue
    db.close()
    total_time = time_converter(time.time() - start_time)
    logger.log("Done finding hall of famers: time = " + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #21
0
def get_day_data(day, month, year):
    if len(day) == 1:
        day = '0' + day
    if len(month) == 1:
        month = '0' + month
    logger.log("\tDownloading data for " + month + '-' + day + '-' + year)
    day_time = time.time()
    home_page_url = 'http://gd2.mlb.com/components/game/mlb/year_' + year + '/month_' + month + '/day_' + day
    home_page = str(BeautifulSoup(urlopen(home_page_url),
                                  'html.parser')).split('<li>')
    for line in home_page:
        try:
            if str(line.split('"day_' + str(day) + '/')[1])[:3] == 'gid':
                if not get_data_from_this_game(home_page_url[:-6] + line.split(
                        '<a href="')[1].split('">')[0] + 'game.xml'):
                    continue
                global innings
                innings = {}
                innings_url = home_page_url[:-6] + line.split(
                    '<a href="')[1].split('">')[0] + 'inning/'
                players_url = home_page_url[:-6] + line.split(
                    '<a href="')[1].split('">')[0] + 'players.xml'
                logger.log("\t\tDownloading data for game: " +
                           line.split('gid_')[1].split('_')[3] + '_' +
                           line.split('gid_')[1].split('_')[4] + ' - ' +
                           innings_url)
                try:
                    innings_page = str(
                        BeautifulSoup(urlopen(innings_url),
                                      'html.parser')).split('<li>')
                    urlretrieve(
                        players_url,
                        os.path.join("..", "..", "baseball-sync", "src",
                                     "import_data", "player_data", "pitch_fx",
                                     "xml", "players.xml"))
                except Exception:
                    innings_page = []
                with ThreadPoolExecutor(os.cpu_count()) as executor:
                    for inning in innings_page:
                        try:
                            if inning.split('<a href="inning_')[1].split(
                                    '.')[0].isdigit():
                                individual_inning_url = inning.split(
                                    '.xml"> ')[1].split('</a>')[0]
                                executor.submit(
                                    load_xml,
                                    innings_url + individual_inning_url,
                                    individual_inning_url.split('_')[1].split(
                                        '.xml')[0])
                        except IndexError:
                            continue
                parse_innings(year, month, day, innings_url)
                clear_xmls()
        except (IndexError, KeyError):
            clear_xmls()
            continue
    logger.log("\tDone downloading data for " + month + '-' + day + '-' +
               year + ": time = " + time_converter(time.time() - day_time) +
               '\n\n')
コード例 #22
0
def catcher_defense(year, logger):
    logger.log('\tDownloading catcher data')
    start_time = time.time()
    page = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/leagues/MLB/' + str(year)
                                     + '-specialpos_c-fielding.shtml'), 'html.parser')).\
        split('Player Fielding - C</h2>')[1].split('<tbody>')[1].split('</tbody>')[0].split('<tr ')
    data = parse_table(page)
    logger.log('\t\tTime = ' + time_converter(time.time() - start_time))
    return data
コード例 #23
0
def primary_and_secondary_positions(year):
    print("adding primary and secondary positions")
    driver_logger.log("\tAdding primary and secondary positions")
    start_time = time.time()
    logger.log("Downloading " + str(year) + " primary and secondary data || Timestamp: " + datetime.datetime.today()\
               .strftime('%Y-%m-%d %H:%M:%S'))
    db = DatabaseConnection(sandbox_mode)
    logger.log("\tAssembling list of players")
    assembly_time = time.time()
    teams_from_year = db.read(
        "select TY_uniqueidentifier from team_years where year=" + str(year) +
        ';')
    teams_from_year_range = db.read(
        "select TY_uniqueidentifier from team_years where year between " +
        str(year - 25) + ' and ' + str(year) + ';')
    player_positions = []
    player_positions_range = []
    for team in teams_from_year:
        player_positions += db.read(
            'select playerId, positions from player_positions where ' +
            'TY_uniqueidentifier = ' + str(team[0]) + ';')
    for team in teams_from_year_range:
        player_positions_range += db.read(
            'select playerId, positions from player_positions where ' +
            'TY_uniqueidentifier = ' + str(team[0]) + ';')
    logger.log("\t\tTime = " + time_converter(time.time() - assembly_time))
    logger.log("\tDetermining positions")
    determination_time = time.time()
    for player in player_positions:
        player_position_string = get_player_positions(player,
                                                      player_positions_range)
        player_positions_dict = determine_primary_position(
            player_position_string)
        write_to_file(player[0].replace("'", "\'"), player_positions_dict)
    db.close()
    logger.log("\t\tTime = " +
               time_converter(time.time() - determination_time))
    total_time = time_converter(time.time() - start_time)
    logger.log("Done downloading primary and secondary positions: time = " +
               total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #24
0
def manager_table_constructor():
    driver_logger.log('\tGathering manager data (all-time)')
    print("Gathering manager data (all-time)")
    start_time = time.time()
    logger.log('Begin populating teams table || Timestamp: ' +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    table = str(
        bs(
            urllib.request.urlopen(
                'https://www.baseball-reference.com/managers/'),
            'html.parser'))
    rows = table.split('<tr')
    db = DatabaseConnection(sandbox_mode=True)
    db.write('ALTER TABLE managers DROP INDEX managerId;')
    db.close()
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for row in rows:
            if '<td class="left" csk="' in row:
                this_row = row.split('</tr>')[0]
                try:
                    manager_id = this_row.split(
                        '<a href="/managers/')[1].split('.shtml')[0].replace(
                            "'", "\'")
                    last_first = this_row.split('</tr>')[0].split(
                        '<td class="left" csk="')[1].split('"')[0]
                    last = last_first.split(',')[0].replace("'", "\'")
                    first = last_first.split(',')[1].replace("'", "\'")
                    wins = this_row.split('data-stat="W">')[1].split('<')[0]
                    loses = this_row.split('data-stat="L">')[1].split('<')[0]
                    executor.submit(
                        write_to_file, '"' + manager_id + '","' + last +
                        '","' + first + '",' + wins + ',' + loses)
                except AttributeError:
                    continue
    db = DatabaseConnection(sandbox_mode=True)
    db.write('ALTER TABLE managers ADD INDEX(managerId);')
    db.close()
    total_time = time.time() - start_time
    logger.log('Constructing manager table completed: time = ' +
               time_converter(total_time))
    driver_logger.log('\t\tTime = ' + time_converter(total_time))
コード例 #25
0
def team_batting_order_constructor(year):
    if year < 1908:
        logger.log("\tNo team batting order data to download before 1908.")
        driver_logger.log(
            "\tNo team batting order data to download before 1908.")
        return
    print("getting team batting order data")
    driver_logger.log("\tGetting team batting order data")
    start_time = time.time()
    global pages
    pages = {}
    logger.log("Downloading " + str(year) +
               " team batting order data || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log("\tDownloading team pages")
    try:
        year_file = open(os.path.join("..", "background", "yearTeams.txt"),
                         'r')
    except FileNotFoundError:
        year_file = open(
            os.path.join("..", "..", "..", "background", "yearTeams.txt"), 'r')
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for line in year_file:
            if str(year) in line:
                temp_line = line.split(',')[1:-1]
                for team in temp_line:
                    if "TOT" not in team:
                        executor.submit(load_url, year,
                                        team.split(';')[0],
                                        team.split(';')[1])
                break
    logger.log("\t\t\tTime = " + time_converter(time.time() - start_time))
    logger.log("\tOrganizing batting orders")
    write_time = time.time()
    get_hitters(year)
    logger.log("\t\t\tTime = " + time_converter(time.time() - write_time))
    total_time = time_converter(time.time() - start_time)
    logger.log("Done downloading team batting order data: time = " +
               total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #26
0
def pitcher_tendencies(year):
    print("storing pitcher tendencies")
    start_time = time.time()
    logger.log("Downloading " + str(year) +
               " pitcher tendencies || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    if year >= 1988:
        driver_logger.log("\tStoring pitcher tendencies")
        logger.log("\tDownloading data")
        prev_player_id = ""
        page = str(
            BeautifulSoup(
                urlopen('https://www.baseball-reference.com/leagues/MLB/' +
                        str(year) + '-pitches-pitching.shtml'), 'html.parser'))
        table = page.split('<h2>Player Pitching Pitches</h2>')[1].split(
            '<tbody>')[1].split('</tbody>')[0]
        rows = table.split('<tr')
        logger.log("\t\tTime = " + time_converter(time.time() - start_time))
        logger.log("\tFormatting data")
        format_time = time.time()
        stat_dictionary = {}
        for row in rows:
            player_id, temp_stats = intermediate(row, prev_player_id)
            if player_id is not None:
                stat_dictionary[player_id] = temp_stats
                prev_player_id = player_id
        for player_id, stats in stat_dictionary.items():
            write_to_file(year, player_id, stats)
        fill_pitchers_with_0_pa(year)
        total_time = time_converter(time.time() - format_time)
        logger.log("\t\tTime = " + total_time)
        driver_logger.log("\t\tTime = " + total_time)
    else:
        driver_logger.log("\tNo pitcher tendency data before 1988")
        logger.log("\tNo pitcher tendency data before 1988")
        fill_fields(year)
    logger.log("Done storing pitcher tendencies: time = " +
               time_converter(time.time() - start_time) + '\n\n')
コード例 #27
0
def team_defensive_statistics(year):
    driver_logger.log("\tGathering team defensive statistics")
    print('Gathering team defensive statistics')
    start_time = time.time()
    logger.log('Downloading team defensive data for ' + str(year) + ' || Timestamp: ' + datetime.datetime.today().\
               strftime('%Y-%m-%d %H:%M:%S'))
    page1 = str(
        BeautifulSoup(
            urlopen("https://www.baseball-reference.com/leagues/MLB/" +
                    str(year) + "-standard-pitching.shtml"), "html.parser"))
    try:
        page2 = str(
            BeautifulSoup(
                urlopen("https://www.baseball-reference.com/leagues/MLB/" +
                        str(year) + "-batting-pitching.shtml"), "html.parser"))
        batting_against_rows = page2.split('Player Batting Against')[0].split('<h2>Team Batting Against')[1].\
                                     split('<tbody>')[1].split('</tbody>')[0].split('<tr>')
    except Exception:
        batting_against_rows = []
    standard_pitching_rows = page1.split('Player Standard Pitching')[0].split('<h2>Team Standard Pitching')[1].\
                                   split('<tbody>')[1].split('</tbody>')[0].split('<tr>')
    stats1 = {
        'R': 'RA',
        'ER': 'ER',
        'H': "HA",
        'HR': 'HRA',
        'BB': 'BBA',
        'HBP': 'HBPA',
        'IBB': 'IBBA',
        'SO': 'K',
        'ERA': 'ERA',
        'whip': 'WHIP'
    }
    stats2 = {
        'PA': 'PAA',
        'AB': 'ABA',
        '2B': '2BA',
        '3B': '3BA',
        'batting_avg': 'BAA',
        'onbase_perc': 'OBA',
        'slugging_perc': 'SLGA',
        'onbase_plus_slugging': 'OPSA',
        'batting_avg_bip': 'BABIPA'
    }
    extract_data(standard_pitching_rows, stats1, year)
    extract_data(batting_against_rows, stats2, year)
    total_time = time_converter(time.time() - start_time)
    logger.log("Done donwloading team defensive data for " + str(year) +
               ': time = ' + total_time + '\n\n')
    driver_logger.log('\t\tTime = ' + total_time)
コード例 #28
0
def triple_crown_winners(year, driver_logger):
    driver_logger.log("\tFinding " + str(year) + " triple crown winners")
    start_time = time.time()
    logger.log("Finding " + str(year) + " triple crown winners || Timestamp: " + datetime.datetime.today().\
               strftime('%Y-%m-%d %H:%M:%S'))
    page = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/awards/triple_crowns.shtml'), 'html.parser'))
    batting_table = page.split('Batting Triple Crowns Table')[1].split('</table>')[0]
    pitching_table = page.split('Pitching Triple Crowns Table')[1].split('</table>')[0]
    hitters = get_winners(year, batting_table, "hitting")
    pitchers = get_winners(year, pitching_table, "pitching")
    total_time = time_converter(time.time() - start_time)
    logger.log("Triple crown finder complete: time = " + total_time)
    driver_logger.log("\t\tTime = " + total_time)
    return hitters, pitchers
コード例 #29
0
def ballpark_and_manager_data(year):
    driver_logger.log('\tGathering ballpark and manager data')
    print("Gathering ballpark and manager data")
    start_time = time.time()
    global pages
    pages = {}
    logger.log('Beginning ballpark and manager data download for ' + str(year) + ' || Timestamp: '
               + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    teams = {}
    with open(os.path.join("..", "background", "yearTeams.txt"), 'rt') as file:
        for line in file:
            if str(year) in line:
                temp_line = line.split(',')[1:-1]
                for team in temp_line:
                    temp_team = team.split(';')
                    if 'TOT' not in temp_team:
                        teams[temp_team[1]] = temp_team[0]
                break
    logger.log('Begin downloading team pages')
    download_time = time.time()
    with ThreadPoolExecutor(os.cpu_count()) as executor1:
        for team_key, team_id in teams.items():
            executor1.submit(load_url, year, team_key)
    logger.log('\tDone downloading team pages: time = ' + time_converter(time.time() - download_time))
    logger.log("Calculating and writing ballpark numbers and downloading images")
    calc_and_download_time = time.time()
    team_count = len(teams)
    with ThreadPoolExecutor(os.cpu_count()) as executor2:
        for team_key, team_id in teams.items():
            executor2.submit(gather_team_home_numbers, team_id, team_key, year, team_count)
            # break
    logger.log("\tDone calculating and writing ballpark numbers and downloading manager data: time = "
               + time_converter(time.time() - calc_and_download_time))
    total_time = time_converter(time.time() - start_time)
    logger.log('Ballpark and manager data download completed: time = ' + total_time + '\n\n')
    driver_logger.log('\t\tTime = ' + total_time)
コード例 #30
0
def team_fielding_file_constructor(year):
    print('getting team fielding positions')
    driver_logger.log("\tGetting team fielding positions")
    start_time = time.time()
    global pages
    pages = {}
    logger.log("Downloading " + str(year) +
               " team fielding positions || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log("\tDownloading team pages")
    try:
        year_file = open(os.path.join("..", "background", "yearTeams.txt"),
                         'r')
    except FileNotFoundError:
        year_file = open(
            os.path.join("..", "..", "..", "background", "yearTeams.txt"), 'r')
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for line in year_file:
            if str(year) in line:
                temp_line = line.split(',')[1:-1]
                for team in temp_line:
                    split_team = team.split(';')
                    if "TOT" not in split_team:
                        executor.submit(load_url, year, split_team[0],
                                        split_team[1])
                year_file.close()
                break
    logger.log("\t\tTime = " + time_converter(time.time() - start_time))
    logger.log("\tOrganizing team position data")
    write_time = time.time()
    write_to_file(year)
    logger.log("\t\tTime = " + time_converter(time.time() - write_time))
    total_time = time_converter(time.time() - start_time)
    logger.log("Done downloading team fielding data: time = " + total_time +
               '\n\n')
    driver_logger.log("\t\tTime = " + total_time)