コード例 #1
0
def manager_tendencies(year):
    driver_logger.log("\tStoring manager tendencies")
    print("storing manager tendencies")
    start_time = time.time()
    logger.log("Downloading " + str(year) +
               " manager tendencies || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log('\tMaking HTTP requests')
    db = DatabaseConnection(sandbox_mode)
    managers = db.read(
        'select manager_teams.managerid, manager_teams.teamid from manager_teams, manager_year where '
        'manager_year.year = ' + str(year) +
        ' and manager_year.mt_uniqueidentifier = manager_teams.'
        'mt_uniqueidentifier;')
    db.close()
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for manager in managers:
            executor.submit(load_url, manager[0], manager[1])
    logger.log('\t\tTime = ' + time_converter(time.time() - start_time))
    process_manager_tendencies(year)
    write_time = time.time()
    logger.log('\tWriting data to database')
    global stats
    with ThreadPoolExecutor(os.cpu_count()) as executor2:
        for manager_team, tendencies in stats.items():
            if len(tendencies) > 0:
                executor2.submit(write_to_file, year, manager_team, tendencies)
    logger.log('\t\tTime = ' + time_converter(time.time() - write_time))
    total_time = time_converter(time.time() - start_time)
    driver_logger.log("\t\tTime = " + total_time)
    logger.log("Done storing manager tendencies: time = " + total_time +
               '\n\n')
コード例 #2
0
ファイル: driver_daily.py プロジェクト: Engy-22/BaseballSync
def driver(day, month, year):
    driver_logger.log(str(month) + '/' + str(day) + '/' + str(year))
    driver_time = time.time()
    print('\n\n' + str(month) + '/' + str(day) + '/' + str(year))
    populate_teams_table(year)
    get_year_data(year)
    ballpark_and_manager_data(year)
    league_standings(year)
    team_offensive_statistics(year)
    team_defensive_statistics(year)
    batting_constructor(year)
    pitching_constructor(year)
    fielding_constructor(year)
    team_fielding_file_constructor(year)
    team_pitching_rotation_constructor(year)
    team_batting_order_constructor(year)
    primary_and_secondary_positions(year)
    determine_pitcher_roles_year(year)
    get_pitch_fx_data(year, month, day)
    hitter_tendencies(year)
    pitcher_tendencies(year)
    manager_tendencies(year)
    hitter_spray_chart_constructor(year)
    pitcher_spray_chart_constructor(year)
    team_certainties(year)
    consolidate_data(year)
    driver_logger.log('Time taken to download ' + str(month) + '/' + str(day) + '/' + str(year) + ' data: '
                      + time_converter(time.time()-driver_time) + '\n')
コード例 #3
0
def hof_finder():
    print("adding HOF data")
    driver_logger.log("\tAdding HOF data")
    start_time = time.time()
    logger.log("Begin finding hall of famers || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    hof_table = str(BeautifulSoup(urlopen('https://www.baseball-reference.com/awards/hof.shtml'), 'html.parser')).\
        split('<tbody>')[1].split('</tbody>')[0]
    rows = hof_table.split('<tr>')[1:]
    db = DatabaseConnection(sandbox_mode)
    for row in rows:
        person = row.split('data-append-csv="')[1].split('"')[0]
        year = row.split('<a href="/awards/hof_')[1].split('.shtml')[0]
        induction_type = row.split('data-stat="category_hof">')[1].split(
            '<')[0]
        if induction_type == 'Player':
            db.write('update players set HOF = ' + str(year) +
                     ' where playerId = "' + person + '";')
        elif induction_type == 'Manager':
            db.write('update managers set HOF = ' + str(year) +
                     ' where managerId = "' + person + '";')
        else:
            continue
    db.close()
    total_time = time_converter(time.time() - start_time)
    logger.log("Done finding hall of famers: time = " + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #4
0
def main(from_server, begin_year, end_year, frame=None):
    print('\n')
    if end_year > begin_year >= 1876:
        driver_logger.log(
            'Begin Yearly Driver || Timestamp: ' +
            datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
        start_time = time.time()
        if not from_server:
            frame.withdraw()
        league_table_constructor()
        manager_table_constructor()
        years = []
        for year in range(begin_year, end_year, 1):
            years.append(year)
            driver(year)
        create_strike_zone()
        rank_driver(years[-1])
        comparisons_driver(years[-1])
        hof_finder()
        clean_up_deadlocked_file()
        auto_migrate()
        driver_logger.log('Driver complete for year' + stringify_list(years) +
                          ': time = ' +
                          time_converter(time.time() - start_time) + '\n\n\n')
    else:
        print(
            'Begin year must be lower than End year, but cannot be lower than 1876.'
        )
    exit()
コード例 #5
0
ファイル: driver.py プロジェクト: Engy-22/BaseballSync
def consolidate_data(year):
    driver_logger.log("\tConsolidating data")
    print("Consolidating data")
    start_time = time.time()
    logger.log("Consolidating team data || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    db = DatabaseConnection(sandbox_mode)
    for ty_uid in db.read(
            'select ty_uniqueidentifier from team_years where year = ' +
            str(year) + ';'):
        team_start_time = time.time()
        logger.log('\t' + db.read(
            'select teamId from team_years where ty_uniqueidentifier = ' +
            str(ty_uid[0]) + ';')[0][0])
        write_roster_info(
            ty_uid[0], {
                'hitter_spots':
                consolidate_hitter_spots(ty_uid[0]),
                'player_positions':
                consolidate_player_positions(ty_uid[0]),
                'batter_stats':
                consolidate_player_stats(ty_uid[0], 'batting', year),
                'pitcher_stats':
                consolidate_player_stats(ty_uid[0], 'pitching', year),
                'fielder_stats':
                consolidate_player_stats(ty_uid[0], 'fielding', year)
            })
        logger.log('\t\tTime = ' +
                   time_converter(time.time() - team_start_time))
    db.close()
    total_time = time_converter(time.time() - start_time)
    logger.log("Done consolidating team data: Time = " + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #6
0
def populate_teams_table(year):
    driver_logger.log('\tPopulating teams table')
    print("Populating teams table")
    start_time = time.time()
    logger.log('Begin populating teams table for ' + str(year) +
               ' || Timestamp: ' +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    with open(os.path.join("..", "background", "yearTeams.txt"), 'rt') as file:
        db = DatabaseConnection(sandbox_mode)
        db.write('ALTER TABLE teams DROP INDEX teamId;')
        for line in file:
            if str(year) in line:
                temp_line = line.split(',')[1:-1]
                for team in temp_line:
                    team_id = team.split(';')[0]
                    db.write('insert into teams (teamId, teamName) values ("' +
                             team_id + '", "' +
                             translate_team_name(team_id).replace("'", "\'") +
                             '");')
                break
    db.write('ALTER TABLE teams ADD INDEX(teamId);')
    db.close()
    total_time = time.time() - start_time
    logger.log('Populating teams table completed: ' +
               time_converter(total_time))
    driver_logger.log('\t\tTime = ' + time_converter(total_time))
コード例 #7
0
def team_certainties(year):
    print('aggregating team statistic certainties')
    driver_logger.log("\tAggregating team statistic certainties")
    start_time = time.time()
    logger.log("Calculating team certainties || Timestamp: " + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    db = DatabaseConnection(sandbox_mode)
    stat_types = ["batting", "pitching"]
    for stat_type in stat_types:
        ty_uids = db.read('select ty_uniqueidentifier, teamid from team_years where year = ' + str(year))
        for ty_uid in ty_uids:
            pau = 0
            player_list = list(db.read('select playerid from player_positions where ty_uniqueidentifier = '
                                       + str(ty_uid[0]) + ';'))
            for player in player_list:
                pt_uid = db.read('select pt_uniqueidentifier from player_teams where playerid = "' + player[0] + '" and'
                                 ' teamid = "' + ty_uid[1] + '";')[0][0]
                try:
                    ent = db.read('select pa, certainty from player_' + stat_type + ' where year = ' + str(year)
                                  + ' and pt_uniqueidentifier = ' + str(pt_uid) + ';')
                    pau += int(ent[0][0]) - (int(ent[0][0]) * float(ent[0][1]))
                except IndexError:
                    continue
                except TypeError:
                    continue
            pa = int(db.read('select pa from team_years where ty_uniqueidentifier = ' + str(ty_uid[0]) + ';')[0][0])
            db.write('update team_years set certainty = ' + str((pa - pau) / pa) + ' where ty_uniqueidentifier = '
                     + str(ty_uid[0]) + ';')
    db.close()
    total_time = time_converter(time.time() - start_time)
    logger.log("Done calculating team certainties: time = " + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #8
0
def clean_up_deadlocked_file():
    driver_logger.log("\tCleaning up deadlocked records")
    with open(os.path.join("utilities", "deadlocked.txt"), 'r') as f:
        db = DatabaseConnection(sandbox_mode)
        for line in f:
            db.write(line)
        db.close()
    file = open(os.path.join("utilities", "deadlocked.txt"), "w").close()
コード例 #9
0
def auto_migrate():
    import_driver_logger.log(
        "\tTransferring all sandbox data to production environment")
    submit({
        'baseballData': {
            True: ["All"]
        },
        'pitch_fx': {
            True: ["All"]
        }
    }, True, True)
コード例 #10
0
def get_pitch_fx_data(year, month=None, day=None):
    if year < 2008:
        driver_logger.log("\tNo pitch fx data to download before 2008")
        return
    start_time = time.time()
    if month is None and day is None:
        driver_logger.log("\tFetching " + str(year) + " pitch fx data")
        print("Fetching " + str(year) + " pitch fx data")
        logger.log("Downloading pitch fx data for " + str(year) +
                   " || Timestamp: " +
                   datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
        db = DatabaseConnection(sandbox_mode)
        opening_day = db.read('select opening_day from years where year = ' +
                              str(year) + ';')[0][0]
        db.close()
        for month in range(3, 12, 1):
            # if month > 11:
            if month >= int(opening_day.split('-')[0]):
                for day in range(1, 32, 1):
                    # if day > 14:
                    if month == int(
                            opening_day.split('-')[0]) and int(day) < int(
                                opening_day.split('-')[1]):
                        continue
                    if len(str(day)) == 1:
                        this_day = '0' + str(day)
                    else:
                        this_day = str(day)
                    if len(str(month)) == 1:
                        this_month = '0' + str(month)
                    else:
                        this_month = str(month)
                    get_day_data(this_day, this_month, str(year))
        logger.log("Done fetching " + str(year) + " pitch fx data: time = " +
                   time_converter(time.time() - start_time) + '\n\n\n\n')
        driver_logger.log("\t\tTime = " +
                          time_converter(time.time() - start_time))
        aggregate_pitch_fx(year)
    else:
        driver_logger.log("\tFetching " + str(month) + "-" + str(day) + "-" +
                          str(year) + " pitch fx data")
        print("Fetching " + str(month) + "-" + str(day) + "-" + str(year) +
              " pitch fx data")
        logger.log("Downloading pitch fx data for " + str(month) + "-" +
                   str(day) + "-" + str(year) + " || Timestamp: " +
                   datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
        get_day_data(str(day), str(month), str(year))
        driver_logger.log("\t\tTime = " +
                          time_converter(time.time() - start_time))
        aggregate_pitch_fx(year, month, day)
コード例 #11
0
def team_defensive_statistics(year):
    driver_logger.log("\tGathering team defensive statistics")
    print('Gathering team defensive statistics')
    start_time = time.time()
    logger.log('Downloading team defensive data for ' + str(year) + ' || Timestamp: ' + datetime.datetime.today().\
               strftime('%Y-%m-%d %H:%M:%S'))
    page1 = str(
        BeautifulSoup(
            urlopen("https://www.baseball-reference.com/leagues/MLB/" +
                    str(year) + "-standard-pitching.shtml"), "html.parser"))
    try:
        page2 = str(
            BeautifulSoup(
                urlopen("https://www.baseball-reference.com/leagues/MLB/" +
                        str(year) + "-batting-pitching.shtml"), "html.parser"))
        batting_against_rows = page2.split('Player Batting Against')[0].split('<h2>Team Batting Against')[1].\
                                     split('<tbody>')[1].split('</tbody>')[0].split('<tr>')
    except Exception:
        batting_against_rows = []
    standard_pitching_rows = page1.split('Player Standard Pitching')[0].split('<h2>Team Standard Pitching')[1].\
                                   split('<tbody>')[1].split('</tbody>')[0].split('<tr>')
    stats1 = {
        'R': 'RA',
        'ER': 'ER',
        'H': "HA",
        'HR': 'HRA',
        'BB': 'BBA',
        'HBP': 'HBPA',
        'IBB': 'IBBA',
        'SO': 'K',
        'ERA': 'ERA',
        'whip': 'WHIP'
    }
    stats2 = {
        'PA': 'PAA',
        'AB': 'ABA',
        '2B': '2BA',
        '3B': '3BA',
        'batting_avg': 'BAA',
        'onbase_perc': 'OBA',
        'slugging_perc': 'SLGA',
        'onbase_plus_slugging': 'OPSA',
        'batting_avg_bip': 'BABIPA'
    }
    extract_data(standard_pitching_rows, stats1, year)
    extract_data(batting_against_rows, stats2, year)
    total_time = time_converter(time.time() - start_time)
    logger.log("Done donwloading team defensive data for " + str(year) +
               ': time = ' + total_time + '\n\n')
    driver_logger.log('\t\tTime = ' + total_time)
コード例 #12
0
def determine_pitcher_roles_year(year):
    driver_logger.log("\tDetermining Pitcher Roles")
    print("Determining Pitcher Roles")
    start_time = time.time()
    logger.log("Determining Pitcher Roles || Timestamp: " + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    db = DatabaseConnection(sandbox_mode)
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for pt_uid in db.read('select pt_uniqueidentifier from player_pitching where year = ' + str(year) + ';'):
            player_id_team_id = db.read('select playerid, teamid from player_teams where pt_uniqueidentifier='
                                        + str(pt_uid[0]) + ';')[0]
            player_id = player_id_team_id[0]
            team_id = player_id_team_id[1]
            if team_id == 'TOT':
                continue
            ty_uid = str(db.read('select ty_uniqueidentifier from team_years where teamId = "' + team_id
                                 + '" and year = ' + str(year) + ';')[0][0])
            try:
                positions = db.read('select positions from player_positions where playerId = "' + player_id + '" and '
                                    'ty_uniqueidentifier = ' + ty_uid + ';')[0][0]
            except IndexError:
                continue
            update_positions = []
            if 'P' in positions:
                appearances_starts = db.read('select G, GS from player_pitching where pt_uniqueidentifier = '
                                             + str(pt_uid[0]) + ' and year = ' + str(year) + ';')[0]
                appearances = appearances_starts[0]
                starts = appearances_starts[1]
                start_percent = starts / appearances
                if start_percent > 0.75:
                    role = ['SP']
                elif start_percent > 0.50:
                    role = ['SP', 'RP']
                elif start_percent > 0.25:
                    role = ['RP', 'SP']
                else:
                    role = ['RP']
                for position in positions.split(','):
                    if position == 'P':
                        update_positions += role
                    else:
                        update_positions.append(position)
                executor.submit(db.write('update player_positions set positions = "' + ','.join(update_positions)
                                         + '" where ty_uniqueidentifier = ' + ty_uid + ' and playerId = "' + player_id
                                         + '";'))
    db.close()
    total_time = time_converter(time.time() - start_time)
    logger.log("Done: Time = " + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #13
0
ファイル: email_results.py プロジェクト: Engy-22/BaseballSync
def send_results():
    print('Emailing results')
    driver_logger.log('\tEmailing results\n\n\n\n')
    sender = config.MAIL_USERNAME
    recipient = config.MAIL_RECIPIENT
    pwd = config.MAIL_PASSWORD
    header = "To: " + recipient + '\nFrom: ' + sender + '\nSubject: Daily download results'
    s = smtplib.SMTP(config.MAIL_SERVER, config.MAIL_PORT)
    s.ehlo()
    s.starttls()
    s.ehlo()
    s.login(sender, pwd)
    s.sendmail(
        sender, recipient,
        header + '\n\n' + get_csv_results() + '\n' + get_driver_results())
    s.quit()
コード例 #14
0
def team_offensive_statistics(year):
    driver_logger.log("\tGathering team offensive statistics")
    print('Gathering team offensive statistics')
    start_time = time.time()
    logger.log('Downloading team offensive data for ' + str(year) + ' || Timestamp: ' + datetime.datetime.today().\
               strftime('%Y-%m-%d %H:%M:%S'))
    page = str(
        BeautifulSoup(
            urlopen("https://www.baseball-reference.com/leagues/MLB/" +
                    str(year) + "-standard-batting.shtml"), "html.parser"))
    stats = {
        'PA': 'PA',
        'AB': 'AB',
        'R': 'R',
        'H': 'H',
        '2B': '2B',
        '3B': '3B',
        'HR': 'HR',
        'RBI': 'RBI',
        'SB': 'SB',
        'CS': 'CS',
        'BB': 'BB',
        'SO': 'SO',
        'GIDP': 'GDP',
        'HBP': 'HBP',
        'SH': 'SH',
        'SF': 'SF',
        'IBB': 'IBB',
        'G': 'G',
        'batting_avg': 'BA',
        'onbase_perc': 'OBP',
        'slugging_perc': 'SLG',
        'onbase_plus_slugging': 'OPS'
    }
    standard_batting_rows = page.split('Player Standard Batting')[0].split('<h2>Team Standard Batting')[1].\
                                 split('<tbody>')[1].split('</tbody>')[0].split('<tr>')
    extract_data(standard_batting_rows, stats, year)
    total_time = time_converter(time.time() - start_time)
    logger.log("Done donwloading team offensive data for " + str(year) +
               ': time = ' + total_time + '\n\n')
    driver_logger.log('\t\tTime = ' + total_time)
コード例 #15
0
def manager_table_constructor():
    driver_logger.log('\tGathering manager data (all-time)')
    print("Gathering manager data (all-time)")
    start_time = time.time()
    logger.log('Begin populating teams table || Timestamp: ' +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    table = str(
        bs(
            urllib.request.urlopen(
                'https://www.baseball-reference.com/managers/'),
            'html.parser'))
    rows = table.split('<tr')
    db = DatabaseConnection(sandbox_mode=True)
    db.write('ALTER TABLE managers DROP INDEX managerId;')
    db.close()
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for row in rows:
            if '<td class="left" csk="' in row:
                this_row = row.split('</tr>')[0]
                try:
                    manager_id = this_row.split(
                        '<a href="/managers/')[1].split('.shtml')[0].replace(
                            "'", "\'")
                    last_first = this_row.split('</tr>')[0].split(
                        '<td class="left" csk="')[1].split('"')[0]
                    last = last_first.split(',')[0].replace("'", "\'")
                    first = last_first.split(',')[1].replace("'", "\'")
                    wins = this_row.split('data-stat="W">')[1].split('<')[0]
                    loses = this_row.split('data-stat="L">')[1].split('<')[0]
                    executor.submit(
                        write_to_file, '"' + manager_id + '","' + last +
                        '","' + first + '",' + wins + ',' + loses)
                except AttributeError:
                    continue
    db = DatabaseConnection(sandbox_mode=True)
    db.write('ALTER TABLE managers ADD INDEX(managerId);')
    db.close()
    total_time = time.time() - start_time
    logger.log('Constructing manager table completed: time = ' +
               time_converter(total_time))
    driver_logger.log('\t\tTime = ' + time_converter(total_time))
コード例 #16
0
def primary_and_secondary_positions(year):
    print("adding primary and secondary positions")
    driver_logger.log("\tAdding primary and secondary positions")
    start_time = time.time()
    logger.log("Downloading " + str(year) + " primary and secondary data || Timestamp: " + datetime.datetime.today()\
               .strftime('%Y-%m-%d %H:%M:%S'))
    db = DatabaseConnection(sandbox_mode)
    logger.log("\tAssembling list of players")
    assembly_time = time.time()
    teams_from_year = db.read(
        "select TY_uniqueidentifier from team_years where year=" + str(year) +
        ';')
    teams_from_year_range = db.read(
        "select TY_uniqueidentifier from team_years where year between " +
        str(year - 25) + ' and ' + str(year) + ';')
    player_positions = []
    player_positions_range = []
    for team in teams_from_year:
        player_positions += db.read(
            'select playerId, positions from player_positions where ' +
            'TY_uniqueidentifier = ' + str(team[0]) + ';')
    for team in teams_from_year_range:
        player_positions_range += db.read(
            'select playerId, positions from player_positions where ' +
            'TY_uniqueidentifier = ' + str(team[0]) + ';')
    logger.log("\t\tTime = " + time_converter(time.time() - assembly_time))
    logger.log("\tDetermining positions")
    determination_time = time.time()
    for player in player_positions:
        player_position_string = get_player_positions(player,
                                                      player_positions_range)
        player_positions_dict = determine_primary_position(
            player_position_string)
        write_to_file(player[0].replace("'", "\'"), player_positions_dict)
    db.close()
    logger.log("\t\tTime = " +
               time_converter(time.time() - determination_time))
    total_time = time_converter(time.time() - start_time)
    logger.log("Done downloading primary and secondary positions: time = " +
               total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #17
0
def award_winner_driver(year):
    print("gathering award winner data")
    driver_logger.log("\tGathering award winner data")
    start_time = time.time()
    logger.log("Beginning award winner driver || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    t1 = mvp_cy_young(year, logger)
    t2 = roy_gatherer(year, logger)
    t3 = moy_gatherer(year, logger)
    t4, t5 = gold_glove_winners(year, logger)
    t6, t7 = silver_slugger_winners(year, logger)
    t8, t9 = triple_crown_winners(year, logger)
    write_to_file(year, [t1, t2, t3, t4, t5, t6, t7, t8, t9])
    if year >= 1933:
        if year not in [1945, 1959, 1960, 1961, 1962]:
            all_star_finder(year, True, logger)
        else:
            all_star_finder(year, False, logger)
    total_time = time_converter(time.time() - start_time)
    logger.log("Award winner driver complete: time = " + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #18
0
ファイル: driver_daily.py プロジェクト: Engy-22/BaseballSync
def main(from_server, day, month, year, frame=None):
    print('\n')
    if 0 < day <= 31 and 0 < month <= 12 and year >= 1876:
        try:
            driver_logger.log('Begin Daily Driver || Timestamp: ' + datetime.datetime.today().
                              strftime('%Y-%m-%d %H:%M:%S'))
            start_time = time.time()
            if not from_server:
                frame.withdraw()
            league_table_constructor()
            manager_table_constructor()
            driver(day, month, year)
            create_strike_zone()
            clean_up_deadlocked_file()
            auto_migrate()
            driver_logger.log('Driver complete for year ' + str(year) + ': time = '
                              + time_converter(time.time()-start_time) + '\n')
        except Exception as e:
            driver_logger.log("ERROR:\t" + str(e))
            send_results()
            raise e
    else:
        print('Must enter a valid date.')
    send_results()
    exit()
コード例 #19
0
def pitcher_spray_chart_constructor(year):
    print("creating pitcher spray charts")
    start_time = time.time()
    global bad_gateway_data
    bad_gateway_data = []
    logger.log("Downloading " + str(year) + " pitcher spray charts || Timestamp: " + datetime.datetime.today()\
               .strftime('%Y-%m-%d %H:%M:%S'))
    if year >= 1988:
        driver_logger.log("\tCreating pitcher spray charts")
        db = DatabaseConnection(sandbox_mode)
        pt_uid_players = set(
            db.read(
                'select PT_uniqueidentifier from player_pitching where year = '
                + str(year) + ' and pa_infield is NULL;'))
        db.close()
        with ThreadPoolExecutor(os.cpu_count()) as executor:
            for ent in pt_uid_players:
                executor.submit(reduce_functionality, year, ent)
        driver_logger.log("\t\tTime = " +
                          time_converter(time.time() - start_time))
    else:
        driver_logger.log("\tNo pitcher spray chart data before 1988")
        logger.log("\tNo spray pitcher chart data before 1988")
        return
    if len(bad_gateway_data) > 0:
        revisit_bad_gateways(year, bad_gateway_data)
    logger.log("Done downloading pitcher spray charts: time = " +
               time_converter(time.time() - start_time) + '\n\n')
コード例 #20
0
def league_table_constructor():
    driver_logger.log('\tPopulating leagues table (all-time)')
    print('Populating leagues table (all-time)')
    logger.log('Begin populating leagues table || Timestamp: ' +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    leagues = {
        'NL': 'National League',
        'AL': 'American League',
        'AA': 'American Association',
        'FL': 'Federal League',
        'PL': 'Players League',
        'UA': 'Union Association',
        'NA': 'National Association'
    }
    db = DatabaseConnection(sandbox_mode)
    db.write('ALTER TABLE leagues DROP INDEX leagueId;')
    for league_id, league_name in leagues.items():
        db.write('insert into leagues (leagueId, leagueName) values ("' +
                 league_id + '", "' + league_name + '");')
    db.write('ALTER TABLE leagues ADD INDEX(leagueId);')
    db.close()
    logger.log('Populating leagues table completed\n\n')
    driver_logger.log('\t\tPopulating leagues table completed')
コード例 #21
0
def team_fielding_file_constructor(year):
    print('getting team fielding positions')
    driver_logger.log("\tGetting team fielding positions")
    start_time = time.time()
    global pages
    pages = {}
    logger.log("Downloading " + str(year) +
               " team fielding positions || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log("\tDownloading team pages")
    try:
        year_file = open(os.path.join("..", "background", "yearTeams.txt"),
                         'r')
    except FileNotFoundError:
        year_file = open(
            os.path.join("..", "..", "..", "background", "yearTeams.txt"), 'r')
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for line in year_file:
            if str(year) in line:
                temp_line = line.split(',')[1:-1]
                for team in temp_line:
                    split_team = team.split(';')
                    if "TOT" not in split_team:
                        executor.submit(load_url, year, split_team[0],
                                        split_team[1])
                year_file.close()
                break
    logger.log("\t\tTime = " + time_converter(time.time() - start_time))
    logger.log("\tOrganizing team position data")
    write_time = time.time()
    write_to_file(year)
    logger.log("\t\tTime = " + time_converter(time.time() - write_time))
    total_time = time_converter(time.time() - start_time)
    logger.log("Done downloading team fielding data: time = " + total_time +
               '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #22
0
def ballpark_and_manager_data(year):
    driver_logger.log('\tGathering ballpark and manager data')
    print("Gathering ballpark and manager data")
    start_time = time.time()
    global pages
    pages = {}
    logger.log('Beginning ballpark and manager data download for ' + str(year) + ' || Timestamp: '
               + datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    teams = {}
    with open(os.path.join("..", "background", "yearTeams.txt"), 'rt') as file:
        for line in file:
            if str(year) in line:
                temp_line = line.split(',')[1:-1]
                for team in temp_line:
                    temp_team = team.split(';')
                    if 'TOT' not in temp_team:
                        teams[temp_team[1]] = temp_team[0]
                break
    logger.log('Begin downloading team pages')
    download_time = time.time()
    with ThreadPoolExecutor(os.cpu_count()) as executor1:
        for team_key, team_id in teams.items():
            executor1.submit(load_url, year, team_key)
    logger.log('\tDone downloading team pages: time = ' + time_converter(time.time() - download_time))
    logger.log("Calculating and writing ballpark numbers and downloading images")
    calc_and_download_time = time.time()
    team_count = len(teams)
    with ThreadPoolExecutor(os.cpu_count()) as executor2:
        for team_key, team_id in teams.items():
            executor2.submit(gather_team_home_numbers, team_id, team_key, year, team_count)
            # break
    logger.log("\tDone calculating and writing ballpark numbers and downloading manager data: time = "
               + time_converter(time.time() - calc_and_download_time))
    total_time = time_converter(time.time() - start_time)
    logger.log('Ballpark and manager data download completed: time = ' + total_time + '\n\n')
    driver_logger.log('\t\tTime = ' + total_time)
コード例 #23
0
def team_batting_order_constructor(year):
    if year < 1908:
        logger.log("\tNo team batting order data to download before 1908.")
        driver_logger.log(
            "\tNo team batting order data to download before 1908.")
        return
    print("getting team batting order data")
    driver_logger.log("\tGetting team batting order data")
    start_time = time.time()
    global pages
    pages = {}
    logger.log("Downloading " + str(year) +
               " team batting order data || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log("\tDownloading team pages")
    try:
        year_file = open(os.path.join("..", "background", "yearTeams.txt"),
                         'r')
    except FileNotFoundError:
        year_file = open(
            os.path.join("..", "..", "..", "background", "yearTeams.txt"), 'r')
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for line in year_file:
            if str(year) in line:
                temp_line = line.split(',')[1:-1]
                for team in temp_line:
                    if "TOT" not in team:
                        executor.submit(load_url, year,
                                        team.split(';')[0],
                                        team.split(';')[1])
                break
    logger.log("\t\t\tTime = " + time_converter(time.time() - start_time))
    logger.log("\tOrganizing batting orders")
    write_time = time.time()
    get_hitters(year)
    logger.log("\t\t\tTime = " + time_converter(time.time() - write_time))
    total_time = time_converter(time.time() - start_time)
    logger.log("Done downloading team batting order data: time = " +
               total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #24
0
def pitcher_tendencies(year):
    print("storing pitcher tendencies")
    start_time = time.time()
    logger.log("Downloading " + str(year) +
               " pitcher tendencies || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    if year >= 1988:
        driver_logger.log("\tStoring pitcher tendencies")
        logger.log("\tDownloading data")
        prev_player_id = ""
        page = str(
            BeautifulSoup(
                urlopen('https://www.baseball-reference.com/leagues/MLB/' +
                        str(year) + '-pitches-pitching.shtml'), 'html.parser'))
        table = page.split('<h2>Player Pitching Pitches</h2>')[1].split(
            '<tbody>')[1].split('</tbody>')[0]
        rows = table.split('<tr')
        logger.log("\t\tTime = " + time_converter(time.time() - start_time))
        logger.log("\tFormatting data")
        format_time = time.time()
        stat_dictionary = {}
        for row in rows:
            player_id, temp_stats = intermediate(row, prev_player_id)
            if player_id is not None:
                stat_dictionary[player_id] = temp_stats
                prev_player_id = player_id
        for player_id, stats in stat_dictionary.items():
            write_to_file(year, player_id, stats)
        fill_pitchers_with_0_pa(year)
        total_time = time_converter(time.time() - format_time)
        logger.log("\t\tTime = " + total_time)
        driver_logger.log("\t\tTime = " + total_time)
    else:
        driver_logger.log("\tNo pitcher tendency data before 1988")
        logger.log("\tNo pitcher tendency data before 1988")
        fill_fields(year)
    logger.log("Done storing pitcher tendencies: time = " +
               time_converter(time.time() - start_time) + '\n\n')
コード例 #25
0
def league_standings(year):
    driver_logger.log("\tAdding to team_years (standings)")
    print("Adding to team_years (standings)")
    start_time = time.time()
    logger.log('Begin organizing league standings for ' + str(year) +
               ' || Timestamp: ' +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    page = str(
        BeautifulSoup(
            urlopen("https://www.baseball-reference.com/leagues/MLB/" +
                    str(year) + "-standings.shtml"), "html.parser"))
    try:
        playoffs = page.split('<h2>Postseason</h2>')[1].split(
            '</tbody></table>')[0]
    except IndexError:
        logger.log("\tNo playoffs in " + str(year))
        playoffs = ""
    divisions = {}
    if year != 1981:
        try:
            divisions['al_east'] = page.split('<h2>East Division</h2>')[1].split('<tbody>')[1].\
                split('</tbody></table>')[0]
        except IndexError:
            pass
        try:
            divisions['nl_east'] = page.split('<h2>East Division</h2>')[2].split('<tbody>')[1].\
                split('</tbody></table>')[0]
        except IndexError:
            pass
        try:
            divisions['al_central'] = page.split('<h2>Central Division</h2>')[1].split('<tbody>')[1].\
                split('</tbody></table>')[0]
        except IndexError:
            pass
        try:
            divisions['nl_central'] = page.split('<h2>Central Division</h2>')[2].split('<tbody>')[1].\
                split('</tbody></table>')[0]
        except IndexError:
            pass
        try:
            divisions['al_west'] = page.split('<h2>West Division</h2>')[1].split('<tbody>')[1].\
                split('</tbody></table>')[0]
        except IndexError:
            pass
        try:
            divisions['nl_west'] = page.split('<h2>West Division</h2>')[2].split('<tbody>')[1].\
                split('</tbody></table>')[0]
        except IndexError:
            pass
    else:
        try:
            divisions['al_east'] = page.split('<h2>East Division -- Overall</h2>')[1].split('<tbody>')[1]\
                .split('</tbody>' + '</table>')[0]
        except IndexError:
            pass
        try:
            divisions['al_west'] = page.split('<h2>West Division -- Overall</h2>')[1].split('<tbody>')[1].\
                split('</tbody>' + '</table>')[0]
        except IndexError:
            pass
        try:
            divisions['nl_east'] = page.split('<h2>East Division -- Overall</h2>')[2].split('<tbody>')[1]\
                .split('</tbody>' + '</table>')[0]
        except IndexError:
            pass
        try:
            divisions['nl_west'] = page.split('<h2>West Division -- Overall</h2>')[2].split('<tbody>')[1].\
                split('</tbody>' + '</table>')[0]
        except IndexError:
            pass
    main_table = page.split('<div class="overthrow table_container" id="div_expanded_standings_overall">')[1].\
        split('<tbody>')[1].split('<tr class="league_average_table')[0].split('<tr')
    champs = {}
    for row in main_table:
        if year == 1904 or year < 1903:
            if 'data-stat="lg_ID" ><strong>' in row:
                champs[row.split('data-stat="lg_ID" ><strong>')[1].split('<')[0]] = \
                    translate_team_id(row.split('href="/teams/')[1].split('/')[0], year)
        try:
            team_key = row.split('/teams/')[1].split('/')[0]
            team_id = translate_team_id(team_key, year)
            if year > 1968:
                this_string = "'" + team_id + "'," + str(
                    year) + "," + get_league_division(divisions, team_key,
                                                      year)
            else:
                this_string = "'" + team_id + "'," + str(
                    year) + "," + get_league_only(row)
            this_string += ',' + wins_loses(row)
            this_string += ',' + is_in_playoffs(playoffs, team_key, year)
        except IndexError:
            continue
        write_to_db(this_string, team_id, year)
    if year == 1903 or year > 1904:  # the first world series (1903); didn't play a WS in 1904
        series = {
            'World Series': 1,
            'ALCS': 1,
            'NLCS': 1,
            'AL Division Series': 2,
            'NL Division Series': 2
        }
        abbreviation = {
            'World Series': 'ws',
            'ALCS': 'alcs',
            'NLCS': 'nlcs',
            'AL Division Series': 'alds',
            'NL Division Series': 'alds'
        }
        playoff_picture = {}
        for matchup, times in series.items():
            for instance in range(times):
                try:
                    playoff_picture[abbreviation[matchup] + '_champ' +
                                    str(instance + 1)] = translate_team_id(
                                        playoffs.split('>' + matchup + '<')
                                        [1].split('a href="/teams/')[1].split(
                                            '/')[0], year)
                except IndexError:
                    playoff_picture[abbreviation[matchup] + '_champ' +
                                    str(instance + 1)] = None
                try:
                    playoff_picture[abbreviation[matchup] + '_runnerup' +
                                    str(instance + 1)] = translate_team_id(
                                        playoffs.split('>' + matchup + '<')
                                        [1].split('a href="/teams/')[2].split(
                                            '/')[0], year)
                except IndexError:
                    playoff_picture[abbreviation[matchup] + '_runnerup' +
                                    str(instance + 1)] = None
        write_playoff_data(year, playoff_picture)
    else:
        write_league_champs_non_ws(champs, year)
    total_time = time_converter(time.time() - start_time)
    logger.log('Done organizing league standings for ' + str(year) +
               ': time = ' + total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #26
0
def create_strike_zone():
    start_time = time.time()
    driver_logger.log('\tCreating Strike Zone')
    points = {}
    db = PitchFXDatabaseConnection(sandbox_mode)
    x_strikes = [
        x[0] for x in db.read(
            'select x from pitcher_pitches where x is not NULL and ball_strike = "strike" '
            'and swing_take = "take";')
    ]
    x_strikes.sort()
    y_strikes = [
        y[0] for y in db.read(
            'select y from pitcher_pitches where y is not NULL and ball_strike = "strike" '
            'and swing_take = "take";')
    ]
    y_strikes.sort()
    passes = 1
    for median, coordinates in {
            stat.median(x_strikes): x_strikes,
            stat.median(y_strikes): y_strikes
    }.items():
        coordinate_orientation = 'x' if passes == 1 else 'y'
        threshold = 1000
        for direction in ['positive', 'negative']:
            sparse_intervals = 0
            if direction == 'positive':
                incrementer = 1
            else:
                incrementer = -1
            place_on_number_line = median + incrementer
            while sparse_intervals < 3:  # a sparse interval is an interval with less than a given number of data points
                if coordinates.count(place_on_number_line) < threshold:
                    sparse_intervals += 1  # increment the number of sparse interval
                else:
                    sparse_intervals = 0  # reset the number of sparse intervals
                place_on_number_line += incrementer  # move farther from the median in the appropriate direction
            points[coordinate_orientation + ('_high_strike' if direction == 'positive' else '_low_strike')] =\
                place_on_number_line - (incrementer * 3)
        points[coordinate_orientation + '_middle'] = \
            (points[coordinate_orientation + '_low_strike'] + points[coordinate_orientation + '_high_strike']) / 2
        for meridian, multiplier in {
                '_meridian_1': 1,
                '_meridian_2': 2
        }.items():
            points[coordinate_orientation + meridian] = points[coordinate_orientation + '_low_strike'] + \
                ((points[coordinate_orientation + '_high_strike'] - points[coordinate_orientation
                                                                           + '_low_strike']) / 3) * multiplier
        for extreme, meridian in {'_low_ball': 1, '_high_ball': 2}.items():
            points[coordinate_orientation + extreme] = \
                points[coordinate_orientation + extreme[:-4] + 'strike'] + \
                (abs(points[coordinate_orientation + extreme[:-4] + 'strike'] -
                     points[coordinate_orientation + '_meridian_' + str(meridian)]) * (-1 if 'low' in extreme else 1))
        passes += 1
    db.close()
    try:
        with open(os.path.join("..", "background", "strike_zone.json"),
                  "w") as strike_zone_file:
            json.dump(points, strike_zone_file, sort_keys=True, indent=4)
    except FileNotFoundError:
        with open(
                os.path.join("..", "..", "..", "background",
                             "strike_zone.json"), "w") as strike_zone_file:
            json.dump(points, strike_zone_file, sort_keys=True, indent=4)
    driver_logger.log('\t\tTime = ' + time_converter(time.time() - start_time))
コード例 #27
0
def comparisons_driver(most_recent_year):
    if most_recent_year < 1998 or not data_continuity(most_recent_year):
        driver_logger.log(
            "\tCannot make comparisons if there is not data continuity back to 1876 or the current year "
            "is prior to  1998")
        logger.log(
            "Cannot make comparisons if there is not data continuity back to 1876 or the current year is prior "
            "to  1998")
        return
    driver_logger.log("\tBeginning comparisons driver")
    start_time = time.time()
    logger.log("Beginning comparisons driver || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    possible_hitter_comps = {}
    possible_pitcher_comps = {}
    possible_offensive_comps = {}
    possible_defensive_comps = {}

    print('making hitter comparisons (overall)')
    driver_logger.log('\t\tMaking hitter comparisons (overall)')
    hc_time = time.time()
    logger.log("\tMaking hitter comparisons (overall)")
    logger.log('\t\tGathering list of possible comps')
    for year_to_compare in [
            year for year in range(most_recent_year, 1997, -1)
    ]:
        possible_hitter_comps[year_to_compare] = {}
        year_pa, year_totals = hitter_year_totals(year_to_compare, logger)
        for comp_hitter in gather_players(year_to_compare, "batting", True,
                                          logger):
            comp_hitter_pa, comp_stats = get_hitter_stats(
                comp_hitter, year_to_compare)
            if comp_hitter_pa >= 300:
                possible_hitter_comps[year_to_compare][comp_hitter + ';' + str(year_to_compare)] =\
                    hitter_dr_calc(comp_hitter_pa, comp_stats, year_pa, year_totals)
    logger.log('\t\tBegin calculating comparisons scores')
    for comp_year in range(1876, most_recent_year + 1, 1):
        try:
            year_pa, year_totals = hitter_year_totals(comp_year, logger)
        except IndexError:
            continue
        make_hitter_comparisons(
            gather_players(comp_year, "batting", False, logger), comp_year,
            possible_hitter_comps, year_pa, year_totals, logger)
    total_time = time_converter(time.time() - hc_time)
    logger.log("\t\tTime = " + total_time)
    driver_logger.log('\t\t\tTime = ' + total_time)

    print('making pitcher comparisons (overall)')
    driver_logger.log('\t\tMaking pitcher comparisons (overall)')
    pc_time = time.time()
    logger.log("\tMaking pitcher comparisons (overall)")
    logger.log('\t\tGathering list of possible comps')
    for year_to_compare in [
            year for year in range(most_recent_year, 1996, -1)
    ]:
        possible_pitcher_comps[year_to_compare] = {}
        year_pa, year_totals = pitcher_year_totals(year_to_compare, logger)
        for comp_pitcher in gather_players(year_to_compare, "pitching", True,
                                           logger):
            comp_pitcher_pa, comp_stats = get_pitcher_stats(
                comp_pitcher, year_to_compare)
            if comp_pitcher_pa >= 200:
                possible_pitcher_comps[year_to_compare][comp_pitcher + ';' + str(year_to_compare)] = \
                    pitcher_dr_calc(comp_pitcher_pa, comp_stats, year_pa, year_totals)
    logger.log('\t\tBegin calculating comparisons scores')
    for comp_year in range(1876, most_recent_year + 1, 1):
        try:
            year_pa, year_totals = pitcher_year_totals(comp_year, logger)
        except IndexError:
            continue
        make_pitcher_comparisons(
            gather_players(comp_year, "pitching", False, logger), comp_year,
            possible_pitcher_comps, year_pa, year_totals, logger)
    total_time = time_converter(time.time() - pc_time)
    logger.log("\t\tTime = " + total_time)
    driver_logger.log('\t\t\tTime = ' + total_time)

    print('making team offensive comparisons')
    driver_logger.log('\t\tMaking team offensive comparisons')
    logger.log('\t\tGathering list of possible comps')
    oc_time = time.time()
    logger.log("\tMaking offensive comparisons (overall)")
    for year_to_compare in [
            year for year in range(most_recent_year, 1997, -1)
    ]:
        year_pa, year_totals = hitter_year_totals(year_to_compare, logger)
        for comp_ty_uid in gather_teams(year_to_compare, logger):
            comp_team_pa, comp_stats = get_offensive_stats(comp_ty_uid)
            possible_offensive_comps[comp_ty_uid] = offensive_dr_calc(
                comp_team_pa, comp_stats, year_pa, year_totals)
    logger.log('\t\tBegin calculating comparisons scores')
    for comp_year in range(1876, most_recent_year + 1, 1):
        try:
            year_pa, year_totals = offensive_year_totals(comp_year, logger)
        except IndexError:
            continue
        make_offensive_comparisons(gather_teams(comp_year, logger),
                                   possible_offensive_comps, year_pa,
                                   year_totals, logger)
    total_time = time_converter(time.time() - oc_time)
    logger.log("\t\tTime = " + total_time)
    driver_logger.log('\t\t\tTime = ' + total_time)

    print('making team defensive comparisons')
    driver_logger.log('\t\tMaking team defensive comparisons')
    logger.log('\t\tGathering list of possible comps')
    dc_time = time.time()
    logger.log("\tMaking defensive comparisons (overall)")
    for year_to_compare in [
            year for year in range(most_recent_year, 1997, -1)
    ]:
        year_pa, year_totals = defensive_year_totals(year_to_compare, logger)
        for comp_ty_uid in gather_teams(year_to_compare, logger):
            comp_team_pa, comp_stats = get_defensive_stats(comp_ty_uid)
            possible_defensive_comps[comp_ty_uid] = defensive_dr_calc(
                comp_team_pa, comp_stats, year_pa, year_totals)
    logger.log('\t\tBegin calculating comparisons scores')
    for comp_year in range(1876, most_recent_year + 1, 1):
        try:
            year_pa, year_totals = defensive_year_totals(comp_year, logger)
        except IndexError:
            continue
        make_defensive_comparisons(gather_teams(comp_year, logger),
                                   possible_defensive_comps, year_pa,
                                   year_totals, logger)
    total_time = time_converter(time.time() - dc_time)
    logger.log("\t\tTime = " + total_time)
    driver_logger.log('\t\t\tTime = ' + total_time)

    total_time = time_converter(time.time() - start_time)
    logger.log("Done making comparisons: time = " + total_time + '\n\n')
    driver_logger.log("\tDone making comparisons: time = " + total_time)
コード例 #28
0
def get_year_data(year):
    driver_logger.log('\tGathering year data')
    print("Gathering year data")
    start_time = time.time()
    global pages
    global strings
    pages = {}
    strings = {}
    logger.log('Beginning year_data download for ' + str(year) +
               ' || Timestamp: ' +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    batting_list = {
        'PA': 'pa',
        'AB': 'ab',
        'R': 'r',
        'H': 'h',
        '2B': '2b',
        '3B': '3b',
        'HR': 'hr',
        'RBI': 'rbi',
        'SB': 'sb',
        'BB': 'bb',
        'SO': 'so',
        'batting_avg': 'ba',
        'onbase_perc': 'obp',
        'slugging_perc': 'slg',
        'onbase_plus_slugging': 'ops'
    }
    pitching_list = {
        'earned_run_avg': 'era',
        'SV': 'sv',
        'IP': 'ip',
        'ER': 'er',
        'whip': 'whip',
        'strikeouts_per_nine': 'k_9',
        'strikeouts_per_base_on_balls': 'k_bb'
    }
    fielding_list = {'E_def': 'e', 'fielding_perc': 'f_percent'}
    stat_list = {
        "batting": batting_list,
        "pitching": pitching_list,
        "fielding": fielding_list
    }
    db = DatabaseConnection(sandbox_mode)
    if len(db.read('select * from years where year = ' + str(year) +
                   ';')) == 0:
        db.write('alter table years drop index year;')
        db.write('insert into years (year) values (' + str(year) + ');')
        db.write('alter table years add index(year);')
    db.close()
    write_opening_day(year)
    download_start = time.time()
    logger.log("making HTTP requests for year data")
    with ThreadPoolExecutor(3) as executor1:
        for key, value in stat_list.items():
            executor1.submit(load_url, year, key)
    logger.log("\tdone making HTTP requests: time = " +
               time_converter(time.time() - download_start))
    for key, dictionary in stat_list.items():
        assemble_stats(key, dictionary, pages[key])
    write_start = time.time()
    logger.log("writing to database")
    with ThreadPoolExecutor(3) as executor2:
        for key, value in stat_list.items():
            executor2.submit(write_to_db, year, strings[key], key)
    logger.log("\tdone writing to database: time = " +
               time_converter(time.time() - write_start))
    total_time = time_converter(time.time() - start_time)
    logger.log('year_data download completed: time = ' + total_time + '\n\n')
    driver_logger.log('\t\tTime = ' + total_time)
コード例 #29
0
def fielding_constructor(year):
    print('Downloading fielder images and attributes')
    driver_logger.log("\tDownloading fielder images and attributes")
    start_time = time.time()
    global data
    data = {}
    catcher_info = catcher_defense(year, logger)
    logger.log("Downloading fielder " + str(year) + " data || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log("\tAssembling list of players")
    table = str(BeautifulSoup(urlopen("https://www.baseball-reference.com/leagues/MLB/" + str(year)
                                      + "-standard-fielding.shtml"), 'html.parser')).\
        split('<table class="sortable stats_table" id')[1].split('<tbody>')[1].split('</tbody>')[0].split('<tr')
    for row in table:
        if 'data-stat="player" csk="' in row and 'data-append-csv="' in row:
            player_id = row.split('data-append-csv="')[1].split(
                '"')[0].replace("'", "\'")
            try:
                team = translate_team_id(
                    row.split('a href="/teams/')[1].split('/')[0], year)
                if len(team) == 4:
                    team = anomaly_team(year)
            except IndexError:
                team = 'TOT'
            if player_id not in data:
                data[player_id] = {}
            if team not in data[player_id]:
                this_index = 1
                data[player_id][team] = {}
            else:
                if team != 'TOT':
                    this_index = len(data[player_id][team]) + 1
                else:
                    continue
            data[player_id][team][this_index] = {
                'row':
                row.split('data-stat'),
                'temp_player':
                row.split('ata-stat="player" csk="')[1].split('" >')[0]
            }
    logger.log("\t\tDone assembling list of players")
    bulk_time = time.time()
    logger.log(
        "\tParsing player pages, downloading images, and extracting player attributes"
    )
    global temp_pages
    with ThreadPoolExecutor(os.cpu_count()) as executor:
        for player_id, dictionary in data.items():
            if len(temp_pages) == os.cpu_count():
                condense_pages()
            executor.submit(load_url(player_id))
    condense_pages()
    write_player_attributes_to_db()
    with ThreadPoolExecutor(os.cpu_count()) as executor3:
        for player_id, dictionary in data.items():
            for team, dictionary2 in dictionary.items():
                for index, dictionary3 in dictionary2.items():
                    executor3.submit(intermediate, team, index, player_id)
    for player_id, dictionary in data.items():
        for team, dictionary2 in dictionary.items():
            try:
                write_teams_and_stats(player_id, dictionary2, team, year,
                                      catcher_info[player_id])
            except KeyError:
                write_teams_and_stats(player_id, dictionary2, team, year, {})
    logger.log("\t\tTime = " + time_converter(time.time() - bulk_time))
    total_time = time_converter(time.time() - start_time)
    logger.log("Done downloading player images and attributes: time = " +
               total_time + '\n\n')
    driver_logger.log("\t\tTime = " + total_time)
コード例 #30
0
ファイル: rank_driver.py プロジェクト: Engy-22/BaseballSync
def rank_driver(year):
    print("\n\ncalculating team ranks (year)")
    driver_logger.log("\tBeginning rank driver")
    start_time = time.time()
    logger.log("Beginning rank driver || Timestamp: " +
               datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'))
    logger.log("\tCalculating team ranks (year)")
    runs = {}
    allowed = {}
    difference = {}
    standard_deviation_for = {}
    standard_deviation_against = {}
    standard_deviation_ovr = {}
    ws_winners = {}
    driver_logger.log("\t\tCalculating team ranks (year)")
    for data_year in range(year, get_oldest_year() - 1, -1):
        runs[data_year], allowed[data_year], difference[
            data_year] = team_ranker_year(data_year)
        standard_deviation_for[str(data_year)] = stdev(
            [team_runs_for[1] for team_runs_for in runs[data_year]])
        standard_deviation_against[str(data_year)] = stdev(
            [team_runs_against[1] for team_runs_against in allowed[data_year]])
        standard_deviation_ovr[str(data_year)] = stdev(
            [team_runs_diff[1] for team_runs_diff in difference[data_year]])
        ws_winners[data_year] = get_ws_winner(data_year)
    total_time = time_converter(time.time() - start_time)
    logger.log("\t\tTime = " + total_time)
    driver_logger.log("\t\t\tTime = " + total_time)
    second_time = time.time()
    driver_logger.log("\t\tCalculating team ranks (overall)")
    logger.log("\tCalculating team ranks (overall)")
    print("calculating team ranks (overall)")
    total_list = []
    years = [value for key, value in runs.items()]
    for ent in years:
        for team_total in ent:
            total_list.append(team_total[1])
    average_deviation_for = mean(
        [value for key, value in standard_deviation_for.items()])
    average_deviation_against = mean(
        [value for key, value in standard_deviation_against.items()])
    average_deviation_diff = mean(
        [value for key, value in standard_deviation_ovr.items()])
    all_time_rpg = get_all_time_rpg()
    team_ranker_ovr(runs, True, "offRank_ovr", all_time_rpg,
                    standard_deviation_for, average_deviation_for)
    team_ranker_ovr(allowed, False, "defRank_ovr", all_time_rpg,
                    standard_deviation_against, average_deviation_against)
    team_ranker_ovr(difference, True, "ovrRank_ovr", all_time_rpg,
                    standard_deviation_ovr, average_deviation_diff, ws_winners)
    second = time_converter(time.time() - second_time)
    logger.log("\t\tTime = " + second)
    driver_logger.log("\t\t\tTime = " + second)
    total_time = time_converter(time.time() - start_time)
    logger.log("Rank driver complete: time = " + total_time + '\n\n')
    driver_logger.log("\t\tRank driver time = " + total_time)