Beispiel #1
0
def populate_yahoo():
    bbdb = postgres.connect_to_bbdb()

    sql = 'SELECT fg_id, name FROM reference.player_names WHERE yahoo_id = \'\''
    missing_yahoo = pd.read_sql_query(sql, con=bbdb)

    sql = 'SELECT yahoo_id, yahoo_name FROM reference.player_pool_yahoo'
    yahoo_names = pd.read_sql_query(sql, con=bbdb)

    #yahoo_search = missing_yahoo.loc[1,:].to_list()
    for i in range(len(missing_yahoo)):
        yahoo_search = missing_yahoo.loc[i, :].to_list()
        fg_id = yahoo_search[0]
        fg_name = yahoo_search[1]

        print('Looking for ' + fg_name)

        if fg_name in yahoo_names['yahoo_name'].to_list():
            print('Think we found a match for ' + fg_name)
            yahoo_id = yahoo_names[yahoo_names['yahoo_name'] ==
                                   fg_name]['yahoo_id'].values[0]
            print('FG info:')
            print(player_names.get_fg_info(fg_id))
            print('Yahoo info:')
            print(player_names.get_yahoo_info(yahoo_id))

            update = input('Go ahead and update player names (Y/N): ')
            if update == 'Y':
                sql_update = 'UPDATE reference.player_names SET yahoo_id = \''+\
                    str(yahoo_id)+\
                    '\' WHERE fg_id=\''+\
                    str(fg_id)+'\''
                print(sql_update)
                bbdb.execute(sql_update)
    player_names.push_player_names_to_gs()
Beispiel #2
0
def get_legacy_ownership():
    import pandas as pd
    from general import postgres

    bbdb = postgres.connect_to_bbdb()
    ff_ownership = pd.read_sql('SELECT * FROM rosters.legacy', bbdb)
    return ff_ownership
Beispiel #3
0
def update_relievers_last14():
    import pandas as pd
    import gspread
    import gspread_dataframe as gsdf

    from general import postgres
    from general import gs

    bbdb = postgres.connect_to_bbdb()
    relievers_last14 = pd.read_sql('''
        SELECT 
            r.name, r.team, ff_elig.ff_elig, r.g, r.ip, r.sv,
            r.hld, r.gmli, r.wpa, r.era, r.kwera, 
            r.xfip, r.siera, r.xera, r.csw_pct, r.k_pct, 
            r.bb_pct, r.swstr_pct, r.vfa, r.babip, r.lob_pct, 
            r.hr_fb, r.asof_date, r.fg_id,
            rosters_sos."Team" as sos_team,
            rosters_legacy."Team" as legacy_team
        FROM tracking.relievers_last14 r
        LEFT JOIN reference.player_pool_ff ff_elig ON r.fg_id=ff_elig.fg_id
        LEFT JOIN rosters.sos rosters_sos ON r.fg_id=rosters_sos.fg_id
        LEFT JOIN rosters.legacy rosters_legacy ON r.fg_id=rosters_legacy.fg_id
        WHERE ff_elig IN ('P', 'RP', 'SP') OR ff_elig IS NULL
        ORDER BY r.wpa DESC
        ''',
                                   con=bbdb)
    gc = gspread.service_account(filename='../bb-2021-2b810d2e3d25.json')
    bb2021 = gc.open("BB 2021 InSeason")
    sheettitle = "Relievers - Last 14"
    bb2021.values_clear(sheettitle + "!A:Z")
    gsdf.set_with_dataframe(bb2021.worksheet(sheettitle), relievers_last14)
    gs.format_gsheet(bb2021.worksheet(sheettitle))
Beispiel #4
0
def scrape_savant():

    from datetime import date
    import os
    import pandas as pd
    from general import selenium_utilities
    from general import postgres
    from munging import player_names

    driver = selenium_utilities.start_driver()
    draft_url = "https://baseballsavant.mlb.com/leaderboard/custom?year=2021&type=batter&filter=&sort=4&sortDir=desc&min=10&selections=b_total_pa,xba,xslg,woba,xwoba,xobp,xiso,wobacon,xwobacon,exit_velocity_avg,launch_angle_avg,barrel_batted_rate,hard_hit_percent,sprint_speed,&chart=false&x=xba&y=xba&r=no&chartType=beeswarm"
    driver.get(draft_url)
    print('Arrived at ' + driver.current_url)

    input_dl = driver.find_element_by_id('btnCSV')
    input_dl.click()

    basepath = "/Users/andrewfelton/Documents/bb/bb-2021"
    dl_file = "/Users/andrewfelton/Downloads/docker/stats.csv"

    today = date.today().strftime("%Y%m%d")
    new_file = basepath + "/data/savant/hitter_stats_" + today + ".csv"
    stream_command = os.popen('mv ' + dl_file + ' ' + new_file)
    mv_file = stream_command.read()

    # create the soft link
    ln_file = basepath + "/data/savant/hitter_stats.csv"
    command_ln = os.popen('ln -sf ' + new_file + ' ' + ln_file)

    driver.close()
    print("Finished scraping " + ln_file)

    savant = pd.read_csv(ln_file)
    savant.insert(0, 'asof_date', date.today().strftime('%Y-%m-%d'))

    # Merge in the player names and FG IDs
    savant.rename(columns={'player_id': 'mlb_id'}, inplace=True)
    savant['mlb_id'] = savant['mlb_id'].apply(str)
    names = player_names.get_player_names()
    savant = savant.merge(right=names[['mlb_id', 'fg_id']],
                          how='left',
                          on='mlb_id')

    #fg_ids = savant[['fg_id']].astype(str).values
    #put_missing_in_GS(id_list=pd.DataFrame(fg_ids, columns=['fg_id']), type='fg_id')

    savant = savant[[
        'asof_date', 'fg_id', 'b_total_pa', 'xba', 'xslg', 'woba', 'xwoba',
        'xobp', 'xiso', 'wobacon', 'xwobacon', 'exit_velocity_avg',
        'launch_angle_avg', 'barrel_batted_rate', 'hard_hit_percent',
        'sprint_speed'
    ]]

    schema = 'tracking'
    tablename = 'savant'
    bbdb = postgres.connect_to_bbdb()
    savant.to_sql(tablename, bbdb, schema=schema, if_exists='replace')
Beispiel #5
0
def scrape_daily_roster(scoring_period, team_id):
    import requests
    import pandas as pd
    import datetime
    import json
    from general import postgres
    bbdb = postgres.connect_to_bbdb()

    sql_delete_rosters_this_period = '''
        DELETE FROM sos_2021.rosters 
        WHERE scoring_period = '{}' AND team_id = '{}'
    '''.format(scoring_period, team_id)
    bbdb.execute(sql_delete_rosters_this_period)

    league_id = '23172'

    query = {
        'sport': 'MLB',
        'league_id': league_id,
        'team_id': team_id,
        'season': '2021',
        'scoring_period': scoring_period
    }
    response = requests.get('https://www.fleaflicker.com/api/FetchRoster',
                            params=query)
    response_json = response.json()

    roster = []
    for group in response_json['groups']:
        for slot in group['slots']:
            player = [slot['position']['label']]  # The position
            # Need to check if there is a player or if the slot is empty
            if 'leaguePlayer' in slot.keys():
                player.append(str(slot['leaguePlayer']['proPlayer']['id']))
                player.append(slot['leaguePlayer']['proPlayer']['nameFull'])
            else:
                player.append('NULL')
                player.append('NULL')
            #print(player)
            roster.append(player)
    #print(roster)

    roster = pd.DataFrame(roster, columns=['position', 'ff_id', 'ff_name'])
    roster['league_id'] = query['league_id']
    roster['team_id'] = query['team_id']
    roster['scoring_period'] = query['scoring_period']
    epochtime = int(response_json['lineupPeriod']['low']['startEpochMilli'])
    scoring_date = datetime.datetime.fromtimestamp(epochtime / 1000.0).date()
    roster['scoring_date'] = scoring_date

    roster.to_sql(name='rosters',
                  con=bbdb,
                  schema='sos_2021',
                  if_exists='append')
    print('Uploaded roster for team_id ' + str(query['team_id']) +
          ' for date ' + str(scoring_date))
Beispiel #6
0
def get_eligibilities(league):
    import sys
    sys.path.append('python/general')
    from general import postgres
    import pandas as pd

    bbdb = postgres.connect_to_bbdb()
    query = ('SELECT fg_id, elig ' 'FROM reference.player_pool_ff ')
    df = pd.read_sql_query(query, bbdb)
    return df
Beispiel #7
0
def get_player_names():
    import pandas as pd
    from general import postgres

    bbdb = postgres.connect_to_bbdb()
    names = pd.read_sql_query(sql='SELECT * FROM REFERENCE.PLAYER_NAMES',
                              con=bbdb)
    for col in ['otto_id', 'yahoo_id', 'bp_id', 'espn_id', 'ff_id', 'fg_id']:
        names[col] = names[col].astype('str').replace('\.0', '', regex=True)
    return names
Beispiel #8
0
def create_combined_valuations(league):
    import pandas as pd
    import gspread
    import gspread_dataframe as gsdf
    import gspread_formatting as gsfmt

    from general import postgres
    from munging import player_names
    from general import gs

    assert league.league_name in ['SoS', 'Legacy']
    #league = 'SoS'
    #league = 'Legacy'

    bbdb = postgres.connect_to_bbdb()
    names = player_names.get_player_names()
    gc = gspread.service_account(filename='./bb-2021-2b810d2e3d25.json')
    #bb2021 = gc.open("BB 2021 " + league.league_name)
    bb2021 = gc.open("BB 2021 InSeason")

    combined_hitters = create_combined_hitter_valuations(league)
    combined_pitchers = create_combined_pitcher_valuations(league)

    hitter_projections = bb2021.worksheet('Hitter Projections - ' +
                                          league.league_name)
    bb2021.values_clear(hitter_projections.title + "!A:Z")
    gsdf.set_with_dataframe(hitter_projections, combined_hitters)
    hitter_projections.update
    format_gs.format_gs_all(league=league, ls=league, type='hitting')

    pitcher_projections = bb2021.worksheet('Pitcher Projections - ' +
                                           league.league_name)
    bb2021.values_clear(pitcher_projections.title + "!A:Z")
    gsdf.set_with_dataframe(pitcher_projections, combined_pitchers)
    pitcher_projections.update
    format_gs.format_gs_all(league=league.league_name,
                            ls=league,
                            type='pitching')

    combined = pd.concat([
        combined_hitters[['name', 'fg_id', 'type', 'zar', 'value']],
        combined_pitchers[['name', 'fg_id', 'type', 'zar', 'value']]
    ])
    combined = combined.sort_values(by='value', ascending=False)

    gs_combined = bb2021.worksheet('Combined Z')
    gsdf.set_with_dataframe(gs_combined, combined)
    gs_combined.update

    gsfmt.format_cell_range(
        gs_combined, 'D:E',
        gsfmt.CellFormat(
            numberFormat=gsfmt.NumberFormat(type='NUMBER', pattern='0.0')))
Beispiel #9
0
def append_new_fg_to_names():
    import sys
    sys.path.append('python/general')
    import postgres
    sys.path.append('python/munging')
    import player_names
    import pandas as pd

    bbdb = postgres.connect_to_bbdb()
    ff_sql = 'SELECT name, playerid as fg_id, pa FROM proj.fg_dc_batters_raw WHERE playerid NOT IN (SELECT fg_id AS playerrid FROM reference.player_names) ORDER BY pa DESC'
    ff_info = pd.read_sql_query(ff_sql, con=bbdb)
    print('Find matches for this player:')
Beispiel #10
0
def inseason_standings_sos():
    gc = gspread.service_account(filename='./bb-2021-2b810d2e3d25.json')
    bb2021 = gc.open("BB 2021 InSeason")
    bbdb = postgres.connect_to_bbdb()

    # Update standings
    ff_standings = pd.read_sql_query('SELECT * FROM tracking.standings_sos',
                                     con=bbdb,
                                     parse_dates=['date'])
    sheettitle = "Standings"
    bb2021.values_clear(sheettitle + "!A:Z")
    gsdf.set_with_dataframe(bb2021.worksheet(sheettitle), ff_standings)
Beispiel #11
0
def push_player_names_to_gs():
    import pandas as pd
    import gspread
    import gspread_dataframe as gsdf

    from general import postgres

    bbdb = postgres.connect_to_bbdb()
    player_names = pd.read_sql(
        'SELECT * FROM reference.player_names ORDER BY name', con=bbdb)
    gc = gspread.service_account(filename='./bb-2021-2b810d2e3d25.json')
    sh = gc.open("BB 2021 Name Matching").worksheet('Player Names')
    gsdf.set_with_dataframe(sh, player_names)
    sh.clear_basic_filter()
    sh.set_basic_filter()
def create_actuals_hitters(ls, year=2021):
    import pandas as pd
    from general import utilities
    from general import postgres
    from general import classes
    from munging import player_names

    bbdb = postgres.connect_to_bbdb()

    if year == 2021:
        tablename = 'tracking'
    else:
        tablename = 'reference'

    query = (
        'SELECT year, bbref_id, bat."Tm" as team, bat."PA" as pa, ' +
        'bat."HR" as hr, bat."R" as r, bat."RBI" as rbi, bat."SB" as sb, bat."OBP" as obp, bat."OPS" as ops '
        + 'FROM ' + tablename + '.bbref_batting_standard bat WHERE year=' +
        str(year))
    df = pd.read_sql_query(query, bbdb)
    df = df.fillna(value={
        'obp': 0,
        'ops': 0,
        'pa': 0,
        'r': 0,
        'rbi': 0,
        'sb': 0
    })
    for c in ['pa', 'r', 'rbi', 'hr', 'sb']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(int)
    for c in ['obp', 'ops']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(float)
    df = df[(df['bbref_id'].notnull()) & (df['bbref_id'] != u'')]

    # merge in the names and reorder
    names = player_names.get_player_names()
    combined_hitters = df.merge(names[['bbref_id', 'fg_id', 'name']],
                                on='bbref_id',
                                how='left')
    output_stats = utilities.flatten(
        [['fg_id', 'bbref_id', 'name', 'team', 'pa'], [ls.hitting_stats]])
    combined_hitters = combined_hitters[output_stats]
    combined_hitters.drop_duplicates(inplace=True)
    return combined_hitters
def create_actuals_pitchers(ls, year=2021):
    import pandas as pd

    from general import utilities
    from general import postgres
    from munging import player_names

    bbdb = postgres.connect_to_bbdb()
    query = (
        'SELECT pit_std.year, pit_std.bbref_id, pit_std."Tm" as team, pit_std."IP" as ip, pit_start."GS" as gs, pit_start."QS" as qs, pit_std."SO" as so, pit_std."ERA" as era, pit_std."WHIP" as whip, pit_relief."SV" as sv, pit_relief."Hold" as hld FROM '
        + '(SELECT * FROM tracking.bbref_pitching_standard) as pit_std ' +
        'LEFT JOIN (SELECT * FROM tracking.bbref_pitching_starter) as pit_start ON pit_std.bbref_id=pit_start.bbref_id AND pit_std.year=pit_start.year AND pit_std."Tm"=pit_start."Tm" '
        +
        'LEFT JOIN (SELECT * FROM tracking.bbref_pitching_reliever) as pit_relief ON pit_std.bbref_id=pit_relief.bbref_id AND pit_std.year=pit_relief.year AND pit_std."Tm"=pit_relief."Tm" '
        + 'WHERE pit_std.year=' + str(year))
    df = pd.read_sql_query(query, bbdb)
    df['ip'] = df['ip'].str.replace('.1', '.33', regex=False)
    df['ip'] = df['ip'].str.replace('.2', '.67', regex=False)
    df = df.fillna(value={
        'era': 0,
        'whip': 0,
        'gs': 0,
        'qs': 0,
        'sv': 0,
        'hld': 0
    })
    for c in ['gs', 'qs', 'so', 'sv', 'hld']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(int)
    for c in ['ip', 'era', 'whip']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(float)
    df['svhld'] = df['sv'] + df['hld']
    df = df[(df['bbref_id'].notnull()) & (df['bbref_id'] != u'')]

    # merge in the names and reorder
    names = player_names.get_player_names()
    combined_pitchers = df.merge(names[['bbref_id', 'fg_id', 'name']],
                                 on='bbref_id',
                                 how='left')
    output_stats = utilities.flatten([['fg_id', 'name', 'team', 'ip'],
                                      [ls.pitching_stats]])
    combined_pitchers = combined_pitchers[output_stats]
    combined_pitchers.drop_duplicates(inplace=True)

    return combined_pitchers
Beispiel #14
0
def scrape_sfb_names():
    # Download the latest .csv file from smartfantasybaseball.com

    driver = selenium_utilities.start_driver()
    driver.get("https://www.smartfantasybaseball.com/PLAYERIDMAPCSV")

    filename = '/Users/andrewfelton/Downloads/docker/SFBB Player ID Map - PLAYERIDMAP.csv'
    player_names = pd.read_csv(filename)
    os.remove(filename)

    tablename = 'player_names_sfb'
    bbdb = postgres.connect_to_bbdb()
    command = 'TRUNCATE TABLE reference.' + tablename
    bbdb.execute(command)
    player_names.to_sql(tablename,
                        bbdb,
                        schema='reference',
                        if_exists='append')
    player_names['yahoo_id'] = str(player_names['yahoo_id'])
Beispiel #15
0
def scrape_sfb_names():
    import time
    import os
    import sys
    import pandas as pd
    sys.path.append('python/general')
    import selenium_utilities
    import postgres

    # EXTRACT
    # Download the latest .csv file from smartfantasybaseball.com
    driver = selenium_utilities.start_driver()
    driver.get("https://www.smartfantasybaseball.com/PLAYERIDMAPCSV")
    time.sleep(2)
    filename = '/Users/andrewfelton/Downloads/docker/SFBB Player ID Map - PLAYERIDMAP.csv'
    sfb_names = pd.read_csv(filename, dtype=str)
    os.remove(filename)

    # TRANSFORM
    orignames = list(sfb_names.columns.values)
    colnames = list(sfb_names.columns.values)
    colmap = dict()
    for i in range(0, len(colnames)):
        colnames[i] = colnames[i].lower()
        if colnames[i][-2:] == 'id':
            colnames[i] = colnames[i][:-2] + '_id'
        if colnames[i][:2] == 'id':
            colnames[i] = colnames[i][2:] + '_id'
        if colnames[i][-4:] == 'name':
            colnames[i] = colnames[i][:-4] + '_name'
        colnames[i] = colnames[i].replace('fangraphs', 'fg')
        colmap[orignames[i]] = colnames[i]
    sfb_names = sfb_names.rename(mapper=colmap, axis=1)

    # LOAD
    tablename = 'player_names_sfb'
    bbdb = postgres.connect_to_bbdb()
    command = 'TRUNCATE TABLE reference.' + tablename
    bbdb.execute(command)
    sfb_names.to_sql(tablename, bbdb, schema='reference', if_exists='append')

    return sfb_names
Beispiel #16
0
def scrape_standings(league):
    import requests
    from bs4 import BeautifulSoup
    import pandas as pd
    import datetime
    from general import postgres

    assert (league.league_platform == 'fleaflicker')
    league_num = league.league_num

    roster_url = 'https://www.fleaflicker.com/mlb/leagues/' + league_num
    page = requests.get(roster_url)
    soup = BeautifulSoup(page.text, 'html.parser')
    main_div = soup.find('div', id='body-center-main')
    tables = main_div.find('table')
    trows = tables.find_all('tr')
    standings = []
    for trow in trows[2:]:
        standing = []
        tds = trow.find_all('td')
        standing.append(tds[0].text)
        for i in range(3, 15):
            standing.append(tds[i].find('span').text)
        standings.append(standing)
    df_standings = pd.DataFrame(standings,
                                columns=[
                                    'team', 'hr', 'r', 'rbi', 'sb', 'obp',
                                    'ops', 'so', 'sv', 'hld', 'era', 'whip',
                                    'qs'
                                ])

    today = datetime.date.today()
    df_standings['date'] = today
    str_today = str(today)

    bbdb = postgres.connect_to_bbdb()
    df_standings.to_sql(name='standings_sos',
                        con=bbdb,
                        schema='tracking',
                        index=False,
                        if_exists='replace')
Beispiel #17
0
def post_sos_d2_drafts(draftnums):

    # Calc avg. and min. values for D2 draft
    bbdb = postgres.connect_to_bbdb()
    query = 'SELECT fg_id, MIN(draft."Pick"::DOUBLE PRECISION) as min_pick, AVG(draft."Pick"::DOUBLE PRECISION) as avg_pick FROM ('
    select_queries = []
    for draftnum in draftnums:
        select_queries.append('SELECT fg_id, cm_mock_' + draftnum +
                              '."Pick" FROM drafts.cm_mock_' + draftnum)
    query = query + ' UNION '.join(
        select_queries) + ') AS draft GROUP BY fg_id'

    df = pd.read_sql_query(query, bbdb)
    gc = gspread.service_account(filename='./bb-2021-2b810d2e3d25.json')
    bb2021 = gc.open("BB 2021 SoS")
    sheettitle = "D2 drafts"
    bb2021.values_clear(sheettitle + "!A:Z")
    gsdf.set_with_dataframe(bb2021.worksheet(sheettitle), df)
    combined = bb2021.worksheet('Combined')
    combined.update
    print('Updated combined spreadsheet')
def create_last30_hitters(ls):
    import pandas as pd
    from general import utilities
    from general import postgres
    from general import classes
    from munging import player_names

    bbdb = postgres.connect_to_bbdb()

    query = (
        'SELECT bat.fg_id, bat.team, bat.pa, ' +
        'bat.hr, bat.r, bat.rbi, bat.sb, bat.obp, bat.obp+bat.slg as ops ' +
        'FROM tracking.batters_last30 AS bat')
    df = pd.read_sql_query(query, bbdb)
    df = df.fillna(value={
        'obp': 0,
        'ops': 0,
        'pa': 0,
        'r': 0,
        'rbi': 0,
        'sb': 0
    })
    for c in ['pa', 'r', 'rbi', 'hr', 'sb']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(int)
    for c in ['obp', 'ops']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(float)
    #df = df[(df['fg_id'].notnull()) & (df['fg_id']!=u'')]

    # merge in the names and reorder
    names = player_names.get_player_names()
    combined_hitters = df.merge(names[['fg_id', 'name']],
                                on='fg_id',
                                how='left')
    output_stats = utilities.flatten([['fg_id', 'name', 'team', 'pa'],
                                      [ls.hitting_stats]])
    combined_hitters = combined_hitters[output_stats]
    combined_hitters.drop_duplicates(inplace=True)
    return combined_hitters
Beispiel #19
0
def pull_player_names_from_gs():
    import pandas as pd
    import gspread
    import gspread_dataframe as gsdf

    from general import postgres

    bbdb = postgres.connect_to_bbdb()
    gc = gspread.service_account(filename='./bb-2021-2b810d2e3d25.json')
    sh = gc.open("BB 2021 Name Matching").worksheet('Player Names')
    player_names = gsdf.get_as_dataframe(sh)
    player_names.sort_values(by='name', inplace=True)

    # convert any numeric ids into text
    for col in [
            'fg_id', 'fg_minor_id', 'bbref_id', 'otto_id', 'yahoo_id', 'bp_id',
            'espn_id', 'ff_id', 'mlb_id'
    ]:
        if col in player_names.columns:
            player_names[col] = player_names[col].astype('str').replace(
                '\.0', '', regex=True)
            player_names[col] = player_names[col].astype('str').replace(
                'nan', '', regex=True)
            player_names[col] = player_names[col].astype('str').replace(
                'NaN', '', regex=True)

    colnames = player_names.columns.tolist()
    player_names.columns = colnames

    command = 'TRUNCATE TABLE reference.player_names'
    bbdb.execute(command)
    player_names.to_sql('player_names',
                        bbdb,
                        schema='reference',
                        if_exists='append',
                        chunksize=1000,
                        method='multi',
                        index=False)
Beispiel #20
0
def scrape_ottoneu_player_pool():
    import os
    from datetime import datetime
    from datetime import date
    from datetime import time
    import pandas as pd
    from selenium.webdriver.common.action_chains import ActionChains

    from general import selenium_utilities
    from munging import player_names
    from player_names import put_missing_in_GS
    from general import postgres

    driver = selenium_utilities.start_driver(headless=False)
    url = 'http://ottoneu.fangraphs.com/averageValues'
    driver.get(url)
    time.sleep(2)
    print('Arrived at ' + driver.current_url)

    button_csv = driver.find_element_by_xpath(
        '/html/body/main/header/div[2]/a[1]')
    button_csv.click()
    time.sleep(3)

    basepath = "/Users/andrewfelton/Documents/bb/bb-2021"
    dl_file = "/Users/andrewfelton/Downloads/docker/ottoneu_average_values.csv"

    today = date.today().strftime("%Y%m%d")
    new_file = basepath + "/data/ottoneu/ottoneu_average_values_" + today + ".csv"
    stream_command = os.popen('mv ' + dl_file + ' ' + new_file)
    mv_file = stream_command.read()

    # create the soft link
    ln_file = basepath + "/data/ottoneu/ottoneu_average_values.csv"
    command_ln = os.popen('ln -sf ' + new_file + ' ' + ln_file)

    driver.close()
    print("Finished scraping " + ln_file)

    ottoneu_player_pool = pd.read_csv(ln_file)
    ottoneu_player_pool.insert(0, 'asof_date',
                               date.today().strftime('%Y-%m-%d'))
    ottoneu_player_pool.rename(columns={
        'Name': 'name',
        'OttoneuID': 'otto_id',
        'FG MajorLeagueID': 'fg_id',
        'FG MinorLeagueID': 'fg_minor_id'
    },
                               inplace=True)
    for idtype in ['otto_id', 'fg_id', 'fg_minor_id']:
        ottoneu_player_pool[[idtype]] = ottoneu_player_pool[[idtype
                                                             ]].astype(str)

    tablename = "player_pool_ottoneu"
    bbdb = postgres.connect_to_bbdb()

    query_tables = "SELECT * FROM pg_catalog.pg_tables WHERE schemaname='reference';"
    tables_list_result = bbdb.execute(query_tables)
    tables_list = []
    for table in tables_list_result:
        tables_list.append(table[1])

    if (tablename in tables_list):
        command = 'TRUNCATE TABLE tracking.' + tablename
        bbdb.execute(command)
    ottoneu_player_pool[[
        'asof_date', 'name', 'otto_id', 'fg_id', 'fg_minor_id'
    ]].to_sql(tablename,
              bbdb,
              schema='reference',
              if_exists='append',
              index=False)

    return ottoneu_player_pool
Beispiel #21
0
def update_inseason_valuations(league_sos, league_legacy):
    import pandas as pd
    import gspread
    import gspread_dataframe as gsdf

    from general import gs
    from general import utilities
    from general import postgres

    sos_hitters = create_combined_hitter_valuations(league=league_sos) \
        .rename(columns={'zar': 'zar_sos', 'value': 'value_sos', 'value_600': 'value_600_sos'})
    legacy_hitters = create_combined_hitter_valuations(league=league_legacy) \
        .rename(columns={'zar': 'zar_legacy', 'value': 'value_legacy', 'value_600': 'value_600_legacy'})
    legacy_extra_columns = list(
        set(legacy_hitters.columns).difference(sos_hitters.columns))
    legacy_extra_columns = utilities.flatten(['fg_id', legacy_extra_columns])

    columns = [
        'name', 'fg_id', 'type', 'elig', 'pa',
        league_sos.hitting_counting_stats, league_sos.hitting_counting_stats,
        league_legacy.hitting_rate_stats, league_legacy.hitting_rate_stats,
        'value_sos', 'value_600_sos', 'value_legacy', 'value_600_legacy'
    ]
    columns = utilities.flatten(columns)
    combined_hitters = sos_hitters.merge(legacy_hitters[legacy_extra_columns],
                                         on='fg_id')
    combined_hitters.drop_duplicates(subset=['fg_id'], inplace=True)

    # Merge in the ownership
    bbdb = postgres.connect_to_bbdb()
    sos_rosters = pd.read_sql(
        'SELECT fg_id, sos."Team" as sos_team FROM rosters.sos', con=bbdb)
    sos_rosters[['fg_id']] = sos_rosters[['fg_id']].astype(str)
    combined_hitters = combined_hitters.merge(sos_rosters,
                                              how='left',
                                              on='fg_id')

    legacy_rosters = pd.read_sql(
        'SELECT fg_id, legacy."Team" as legacy_team FROM rosters.legacy',
        con=bbdb)
    legacy_rosters[['fg_id']] = legacy_rosters[['fg_id']].astype(str)
    combined_hitters = combined_hitters.merge(legacy_rosters,
                                              how='left',
                                              on='fg_id')
    combined_hitters.drop(combined_hitters[
        (combined_hitters['fg_id'] == '19755')
        & (combined_hitters['legacy_team'] == 'Harper Wallbanger')].index,
                          inplace=True)

    # Pitchers
    sos_pitchers = create_combined_pitcher_valuations(league=league_sos) \
        .rename(columns={'zar': 'zar_sos', 'value': 'value_sos'})
    legacy_pitchers = create_combined_pitcher_valuations(league=league_legacy) \
        .rename(columns={'zar': 'zar_legacy', 'value': 'value_legacy'})
    legacy_extra_columns = list(
        set(legacy_pitchers.columns).difference(sos_pitchers.columns))
    legacy_extra_columns = utilities.flatten(['fg_id', legacy_extra_columns])

    columns = [
        'name', 'fg_id', 'type', 'ip', league_sos.pitching_counting_stats,
        league_legacy.pitching_counting_stats, league_sos.pitching_rate_stats,
        league_legacy.pitching_rate_stats, 'zar_sos', 'value_sos',
        'zar_legacy', 'value_legacy'
    ]
    columns = utilities.flatten(columns)
    combined_pitchers = sos_pitchers.merge(
        legacy_pitchers[legacy_extra_columns], on='fg_id')
    combined_pitchers = combined_pitchers[columns]

    # Merge in CFIP
    bbdb = postgres.connect_to_bbdb()
    cfip = pd.read_sql('SELECT * FROM hist.bp_pitchers_raw', con=bbdb)
    combined_pitchers = combined_pitchers.merge(cfip[['fg_id', 'DRA', 'cFIP']],
                                                how='left',
                                                on='fg_id')

    # Merge in xxxFIP
    bbdb = postgres.connect_to_bbdb()
    cfip = pd.read_sql('SELECT * FROM tracking.xxxfip WHERE fg_id IS NOT NULL',
                       con=bbdb)
    combined_pitchers = combined_pitchers.merge(cfip[['fg_id', 'xxxFIP']],
                                                how='left',
                                                on='fg_id')

    # Merge in the ownership
    combined_pitchers = combined_pitchers.merge(sos_rosters,
                                                how='left',
                                                on='fg_id')
    combined_pitchers = combined_pitchers.merge(legacy_rosters,
                                                how='left',
                                                on='fg_id')
    combined_pitchers.drop(combined_pitchers[
        (combined_pitchers['fg_id'] == '19755')
        & (combined_pitchers['legacy_team'] == 'Florun\'s Team')].index,
                           inplace=True)

    # Update Google Sheets
    gc = gspread.service_account(filename='./bb-2021-2b810d2e3d25.json')
    sh = gc.open("BB 2021 InSeason").worksheet('Proj - Hitters')
    gsdf.set_with_dataframe(sh, combined_hitters)
    gs.format_gsheet(sheet=sh)
    sh = gc.open("BB 2021 InSeason").worksheet('Proj - Pitchers')
    gsdf.set_with_dataframe(sh, combined_pitchers)
    gs.format_gsheet(sheet=sh)
Beispiel #22
0
def find_other_ids_w_yahoo(yahoo_id):
    #yahoo_id = 11702
    import sys
    sys.path.append('python/general')
    import postgres
    sys.path.append('python/munging')
    import player_names
    import pandas as pd

    bbdb = postgres.connect_to_bbdb()
    yahoo_sql = 'SELECT yahoo_id, yahoo_name, yahoo_team, yahoo_elig, fg_id FROM REFERENCE.player_pool_yahoo WHERE yahoo_id=\'' + str(
        yahoo_id) + '\''
    yahoo_info = pd.read_sql_query(yahoo_sql, con=bbdb)
    #print('Find matches for this player:')
    #print(yahoo_info)

    if len(yahoo_info) == 0:
        print(
            'This yahoo_id is not in the Yahoo player pool.  Please rerun the player pool generator'
        )
        return False
    else:
        yahoo_name = yahoo_info['yahoo_name'].to_list()[0]
        print('Here is the Yahoo player pool info available on ' + yahoo_name +
              ':')
        print(yahoo_info)

    names = player_names.get_player_names()

    # If it's already in the list of player names:
    if yahoo_name in names['name'].to_list():
        matches = names[names['name'] == yahoo_name]
        if len(matches) == 1:
            print('Found a match!')
            print('FG info:')
            fg_id = matches['fg_id'].to_list()[0]
            fg_info = get_fg_info(fg_id)
            print(fg_info)
            perform_merge = input(
                'Do you want to merge in the Yahoo ID into the existing match?'
            )
            if perform_merge:
                sql_update = 'UPDATE reference.player_names SET yahoo_id = \''+\
                    str(yahoo_id)+\
                    '\' WHERE fg_id=\''+\
                    str(fg_id)+'\''
                print(sql_update)
                bbdb.execute(sql_update)
                player_names.push_player_names_to_gs()
            else:
                print('OK, won\'t update')
        elif len(matches) > 1:
            print(
                'There is more than one match.  Please manually update.  List of matches:'
            )
            print(matches)
    else:
        # If it's not already in the list of player names, see if there is a match in the raw FG data
        yahoo_sql = \
            'SELECT name, fg_id FROM '+\
            '(SELECT "Name" as name, playerid as fg_id from proj.fg_dc_batters_raw '+\
            'UNION '+\
            'SELECT "Name" as name, playerid as fg_id from proj.fg_dc_pitchers_raw '+\
            ') fg_raw_proj_union '+\
            'WHERE fg_id NOT IN (SELECT fg_id FROM reference.player_names) ORDER BY name'
        yahoo_info = pd.read_sql_query(yahoo_sql, con=bbdb)
        if yahoo_name in yahoo_info['name'].to_list():
            matches = yahoo_info[yahoo_info['name'] == yahoo_name]
            if len(matches) == 1:
                print('Found a match!')
                print('FG info:')
                fg_id = matches['fg_id'].to_list()[0]
                fg_info = get_fg_info(fg_id)
                print(fg_info)
                perform_append = input(
                    'Do you want to append this to the list of player names?')
                if perform_append == 'Y':
                    sql_append_new_name = \
                        'INSERT INTO reference.player_names (name, fg_id, yahoo_id) '+\
                        'VALUES ('+\
                        '\'' + fg_info[1] + '\', \'' + str(fg_info[0]) + '\', \'' + str(yahoo_id) + '\'' +\
                        ')'
                    print(sql_append_new_name)
                    bbdb.execute(sql_append_new_name)
                    player_names.push_player_names_to_gs()
                else:
                    print('OK, won\'t update')
            else:
                'Cannot find an exact name match in the FG projections'
Beispiel #23
0
def scrape_yahoo_player_pool():
    # This loops through the FF player pages and saves the player name, id, and eligibility to the database
    from bs4 import BeautifulSoup
    import pandas as pd
    import requests
    import time
    import unidecode

    from munging import player_names
    from general import postgres


    # EXTRACT
    pitcher_base_url = 'https://baseball.fantasysports.yahoo.com/b1/26574/players?status=ALL&pos=P&cut_type=33&stat1=S_S_2021&myteam=0&sort=R_PO&sdir=1&count='
    hitter_base_url  = 'https://baseball.fantasysports.yahoo.com/b1/26574/players?status=ALL&pos=B&cut_type=33&stat1=S_S_2021&myteam=0&sort=R_PO&sdir=1&count='
              
    players = []
    for baseurl in [hitter_base_url, pitcher_base_url]:
        for i in range(0, 401, 25):
            url = baseurl + str(i)
            page = requests.get(url)
            print('Got '+url)
            time.sleep(1)
            soup = BeautifulSoup(page.text, 'html.parser')
            table = soup.find('div', {'id':'players-table'}).find('table')

            for trow in table.find('tbody').find_all('tr'):
                player_div = trow.find('div', {'class':'ysf-player-name'})
                player_name = unidecode.unidecode(player_div.find('a').text)
                player_url = player_div.find('a')['href']
                player_id = player_url.split('/')[-1].split('-')[-1]
                player_team_elig = player_div.find('span', {'class':'Fz-xxs'}).text.split('-')
                player_team = player_team_elig[0].strip()
                player_elig = player_team_elig[1].strip()
                players.append([player_id, player_name, player_url, player_team, player_elig])

    df_players = pd.DataFrame(players, columns=['yahoo_id', 'yahoo_name', 'yahoo_url', 'yahoo_team', 'yahoo_elig'])

    # TRANSFORM
    def combine_eligibilities(row):
        yahoo_elig_list = row['yahoo_elig'].split(',')
        #print(row['ff_name'])
        #print(row['ff_elig'])
        #print(ff_elig_list)
        eligibilities = []

        # Utilty/DH-only
        if yahoo_elig_list == 'Util':
            eligibilities.append('UT')

        # Infielders
        for pos in ['C', '1B', '2B', 'SS', '3B']:
            if pos in yahoo_elig_list:
                eligibilities.append(pos)
        if '2B' in eligibilities or 'SS' in eligibilities:
            eligibilities.append('MI')
        if '1B' in eligibilities or '3B' in eligibilities:
            eligibilities.append('CI')
        if 'MI' in eligibilities or 'CI' in eligibilities:
            eligibilities.append('IF')

        # Outfielders
        for pos in ['OF', 'RF', 'LF', 'CF']:
            if pos in yahoo_elig_list and 'OF' not in eligibilities:
                eligibilities.append('OF')

        # Pitchers
        for pos in ['SP', 'RP']:
            if pos in yahoo_elig_list:
                eligibilities.append(pos)

        #print(eligibilities)
        # Concatenate into a string and return
        elig = ' '.join(eligibilities).strip()
        #print(elig)
        if elig == '':
            elig = 'UT'
        return elig

    df_players['elig'] = df_players.apply(lambda row: combine_eligibilities(row), axis=1)

    names = player_names.get_player_names()
    df_players = df_players.merge(right=names[['yahoo_id', 'fg_id']], how='left', on='yahoo_id')

    # LOAD    
    bbdb = postgres.connect_to_bbdb()
    df_players.to_sql('player_pool_yahoo', con=bbdb, schema='reference', if_exists='replace', chunksize=1000, method='multi', index=False)
    print('Uploaded Yahoo player pool')

    return df_players
Beispiel #24
0
def scrape_yahoo_roster(league_num='26574'):
    print('\n--------------------------\nScraping Yahoo rosters:\n')

    from datetime import date
    import requests
    from bs4 import BeautifulSoup
    import pandas as pd
    import gspread
    import gspread_dataframe as gsdf

    from general import postgres
    from general import selenium_utilities
    from munging import player_names

    league_url = 'https://baseball.fantasysports.yahoo.com/b1/' + league_num + '/startingrosters'
    print('Scraping from '+league_url)
    page = requests.get(league_url)
    bs_rosters = BeautifulSoup(page.text, 'html.parser')
    main_div = bs_rosters.find('div', id='yspmaincontent')
    tables = main_div.find_all('div', {'class':'Grid-u-1-2 Pend-xl'})

    rosters = []
    for table in tables:
        #roster = []
        owner_id = table.find('p').find('a')['href'].split('/')[-1]
        owner = table.find('p').find('a').text
        # print('Scraping ' + owner)
        player_rows = table.find('table').find('tbody').find_all('tr')
        for player_row in player_rows:
            tds = player_row.find_all('td')
            td_pos = tds[0]
            pos = td_pos.text
            td_player = tds[1]
            info_player = td_player.find('div', {'class':'ysf-player-name'})
            if info_player.find('div', {'class':'emptyplayer'}) is not None:
                rosters.append([owner, pos, 'empty', 'empty'])
            else:
                player = info_player.find('a')
                #print(player)
                playerid = str(player['href'].split('/')[-1])
                playername = player.text
                rosters.append([owner_id, owner, pos, playerid, playername])

    rosters = pd.DataFrame(rosters, columns=['owner_id', 'Team', 'pos', 'yahoo_id', 'name'])
    #player_names.put_missing_in_GS(id_list=rosters[rosters['yahoo_id']!='empty'], type='yahoo_id')


    names = player_names.get_player_names()
    rosters = rosters.merge(
        names[['yahoo_id', 'fg_id', 'name']],
        on='yahoo_id',
        how='left'
    )
    today = date.today().strftime("%Y%m%d")
    rosters['date'] = today
    rosters = rosters[['date', 'owner_id', 'Team', 'pos', 'fg_id', 'yahoo_id']]

    missing_fg_id = rosters[rosters['fg_id'].isna()]
    if len(missing_fg_id)>0:
        for player in missing_fg_id.values.tolist():
            if player[3] != 'empty': # Don't flag if it's just an empty position slot
                print('\nMissing info on:')
                print(player)
                yahoo_match = player_names.find_other_ids_w_yahoo(player[5])
        player_names.push_player_names_to_gs()
        print('Updated Google Sheets')



    today = date.today().strftime("%Y%m%d")
    basename = "/Users/andrewfelton/Documents/bb/bb-2021/data/yahoo/rosters"
    new_file = basename + "_" + today + ".csv"
    rosters.to_csv(new_file)

    bbdb = postgres.connect_to_bbdb()
    rosters.to_sql('legacy', con=bbdb, schema='rosters', if_exists='replace', index=False)
    print('Uploaded to database')
Beispiel #25
0
def update_ff_rosters():
    import pandas as pd
    import requests
    import json
    import datetime
    import time

    from general import postgres

    bbdb = postgres.connect_to_bbdb()

    league_id = '23172'

    # Get the list of teams
    query = {
        'sport': 'MLB',
        'league_id': league_id,
        'season': '2021',
        'scoring_period': '1'
    }
    response = requests.get(
        'https://www.fleaflicker.com/api/FetchLeagueRosters', params=query)
    response_json = response.json()
    #with open('fleaflicker_api_test.json', 'w') as f:
    #    json.dump(response_json, f, indent=4)

    teams = []
    for roster in response_json['rosters']:
        print(roster['team']['name'])
        team = [roster['team']['name'], roster['team']['id']]
        teams.append(team)

    teams = pd.DataFrame(teams, columns=['team_name', 'team_id'])
    teams['league_id'] = query['league_id']
    #print(teams)

    teams.to_sql(name='teams',
                 con=bbdb,
                 schema='sos_2021',
                 if_exists='replace')

    # Get the list of eligible scoring periods from one team
    teams = pd.read_sql('SELECT * FROM sos_2021.teams LIMIT 1', con=bbdb)
    league_id = teams.loc[0, 'league_id']
    team_id = teams.loc[0, 'team_id']

    query = {
        'sport': 'MLB',
        'league_id': league_id,
        'team_id': team_id,
        'season': '2021',
        'scoring_period': '1'
    }
    response = requests.get('https://www.fleaflicker.com/api/FetchRoster',
                            params=query)
    response_json = response.json()
    with open('fleaflicker_api_test.json', 'w') as f:
        json.dump(response_json, f, indent=4)

    # Populate the table of all scoring dates
    scoring_dates = []
    for scoring_day in response_json['eligibleLineupPeriods']:
        scoring_period = scoring_day['low']['ordinal']
        epochmilli = int(scoring_day['low']['startEpochMilli'])
        scoring_date = datetime.datetime.fromtimestamp(epochmilli /
                                                       1000.0).date()
        scoring_dates.append([scoring_period, scoring_date])
    scoring_dates = pd.DataFrame(scoring_dates,
                                 columns=['scoring_period', 'scoring_date'])
    scoring_dates.to_sql(name='scoring_dates',
                         con=bbdb,
                         schema='sos_2021',
                         if_exists='replace')

    # Get the list of scoring dates that need to be updated
    today = datetime.date.today()
    missing_scoring_periods = pd.read_sql('''
    SELECT scoring_period::integer, scoring_date FROM sos_2021.scoring_dates
    WHERE (scoring_date <= '{}') AND
        (scoring_period >= (
        SELECT DISTINCT MAX(scoring_period::integer) AS scoring_period
        FROM sos_2021.rosters
        ))
    '''.format(today, today),
                                          con=bbdb)

    # Loop through all the teams and get the rosters
    teams = pd.read_sql('SELECT * FROM sos_2021.teams', con=bbdb)

    league_id = teams.loc[0, 'league_id']
    team_ids = teams['team_id'].to_list()

    for scoring_period in missing_scoring_periods['scoring_period'].to_list():
        for team_id in team_ids:
            scrape_daily_roster(scoring_period, team_id)
            time.sleep(.5)
Beispiel #26
0
def rosters(league, upload_to_db=True):
    import sys
    import datetime
    import requests
    from bs4 import BeautifulSoup
    import pandas as pd
    from general import classes
    from munging import player_names
    from general import postgres

    print(
        '\n--------------------------\nScraping Fleaflicker rosters for league:'
        + league.league_name + '\n')
    assert (league.league_platform == 'fleaflicker')
    league_num = league.league_num

    roster_url = 'https://www.fleaflicker.com/mlb/leagues/' + league_num + '/teams'
    page = requests.get(roster_url)
    soup = BeautifulSoup(page.text, 'html.parser')
    main_div = soup.find('div', id='body-center-main')
    tables = main_div.find_all('table')

    today = datetime.date.today()
    str_today = str(today)

    teams = []

    for t in tables:
        trows = t.find_all('tr')
        for tr in trows:
            if (tr.find("span", {"class": "league-name"
                                 })):  # Found the span with the team name
                team_name = tr.find("span", {"class": "league-name"}).text
                #print('New team: '+team_name)
                teams.append(classes.FantasyTeam(team_name))
                current_team = teams[-1]
            elif tr.find('a', {"class": "player-text"}):
                player_data = tr.find('a', {"class": "player-text"})
                player_name = player_data.text
                player_url = 'https://www.fleaflicker.com' + player_data['href']
                player_ff_id = player_data['href'].split('/')[-1].split(
                    '-')[-1]
                current_team.add_player(player_name, player_ff_id)

    df_export = pd.DataFrame(columns=['Team', 'Player', 'ff_id'])
    for team in teams:
        df_export = pd.concat([df_export, team.to_dataframe()])
    df_export.reset_index(drop=True, inplace=True)

    names = player_names.get_player_names()
    df_export = df_export.merge(right=names[['ff_id', 'fg_id']],
                                how='left',
                                on='ff_id')

    # Go through the Fleaflicker players that don't have matching FG IDs
    missing_fg_id = df_export[df_export['fg_id'].isna()]
    if len(missing_fg_id) > 0:
        print('Miising fg_id for ' + str(len(missing_fg_id.values)) +
              ' player(s):')
        for player in missing_fg_id.values.tolist():
            print('\nMissing info on:')
            print(player)
            ff_match = player_names.find_other_ids_w_ff(player[2])

    file_rosters = '/Users/andrewfelton/Documents/bb/bb-2021/data/rosters/rosters_' + league_num + '_' + str_today + '.csv'
    df_export.to_csv(file_rosters, index=False)
    print('Saved rosters to ' + file_rosters)

    if upload_to_db:
        bbdb = postgres.connect_to_bbdb()
        df_export.to_sql('sos',
                         con=bbdb,
                         schema='rosters',
                         if_exists='replace',
                         index=False)
        print('Uploaded to database')

        player_names.push_player_names_to_gs()
        print('Updated Google Sheets')

    return df_export
def scrape_fg_projections(type, system, mytype, mysystem):
    import os
    from datetime import date
    import time
    import pandas as pd
    from datetime import datetime
    from general import selenium_utilities
    from general import postgres
    from munging import player_names
    from player_names import put_missing_in_GS
    from scraping import scrape_fg_projections

    driver = selenium_utilities.start_driver(headless=True)
    driver = scrape_fg_projections.fg_login(driver)

    fg_proj_url_base = 'https://www.fangraphs.com/projections.aspx?pos=all'
    fg_proj_url_type = 'stats=' + type
    fg_proj_url_system = 'type=' + system
    fg_proj_url = fg_proj_url_base + '&' + fg_proj_url_type + '&' + fg_proj_url_system
    driver.get(fg_proj_url)
    time.sleep(2)
    #print('Arrived at '+driver.current_url)

    btn_dl_projections = driver.find_element_by_id('ProjectionBoard1_cmdCSV')
    btn_dl_projections.click()
    time.sleep(3)

    fg_account_name = driver.find_element_by_id('linkAccount').text
    print('Account name is: ' + fg_account_name)

    basepath = "/Users/andrewfelton/Documents/bb/bb-2021"
    dl_file = "/Users/andrewfelton/Downloads/docker/FanGraphs\ Leaderboard.csv"

    today = date.today().strftime("%Y%m%d")
    new_file = basepath + "/data/fangraphs/" + mysystem + "_" + mytype + "_" + today + ".csv"
    stream_command = os.popen('mv ' + dl_file + ' ' + new_file)
    mv_file = stream_command.read()

    # create the soft link
    ln_file = basepath + "/data/fangraphs/" + mysystem + "_" + mytype + ".csv"
    command_ln = os.popen('ln -sf ' + new_file + ' ' + ln_file)

    driver.close()
    #selenium_utilities.stop_selenium('bbsel')
    print("Finished scraping " + ln_file)

    proj = pd.read_csv(ln_file)
    proj.insert(0, 'asof_date', date.today().strftime('%Y-%m-%d'))

    # Check to confirm that all the fg_id are in the names table
    # To avoid pandas issues take it out of the dataframe and then put it back in
    fg_ids = proj[['playerid']].astype(str).values
    #put_missing_in_GS(id_list=pd.DataFrame(fg_ids, columns=['fg_id']), type='fg_id')

    tablename = mysystem + "_" + mytype + "_raw"
    bbdb = postgres.connect_to_bbdb()

    query_tables = "SELECT * FROM pg_catalog.pg_tables WHERE schemaname='proj';"
    tables_list_result = bbdb.execute(query_tables)
    tables_list = []
    for table in tables_list_result:
        tables_list.append(table[1])

    if (tablename in tables_list):
        command = 'TRUNCATE TABLE proj.' + tablename
        bbdb.execute(command)
    proj.to_sql(tablename,
                bbdb,
                schema='proj',
                if_exists='append',
                index=False)
def scrape_fg_leaderboard(fg_leaderboard_url,
                          scrapedate,
                          folder,
                          filename,
                          schema,
                          table,
                          driver=None):
    import os
    from datetime import date
    import time
    import pandas as pd
    import datetime
    from selenium.webdriver.common.action_chains import ActionChains
    import yaml

    from scraping import scrape_fg_projections
    from general import selenium_utilities
    from general import postgres
    from munging import player_names

    bbdb = postgres.connect_to_bbdb()

    driver_keepalive = True
    if driver == None:
        driver_keepalive = False
        driver = selenium_utilities.start_driver(headless=False)
        driver = scrape_fg_projections.fg_login(driver)

    driver.get(fg_leaderboard_url)
    time.sleep(1)
    print('Arrived at ' + driver.current_url)

    btn_dl_projections = driver.find_element_by_id('LeaderBoard1_cmdCSV')

    actions = ActionChains(driver)
    actions.move_to_element(btn_dl_projections).perform()
    driver.execute_script("window.scrollBy(0, 200);")

    btn_dl_projections.click()
    time.sleep(3)

    if not driver_keepalive:
        driver.close()
        driver.quit()

    basepath = "/Users/andrewfelton/Documents/bb/bb-2021"
    dl_file = "/Users/andrewfelton/Downloads/docker/FanGraphs\ Leaderboard.csv"

    new_file = "{basepath}/data/{folder}/{filename}_{scrapedate}.csv".format(
        basepath=basepath,
        folder=folder,
        filename=filename,
        scrapedate=scrapedate)
    stream_command = 'mv {dl_file} {new_file}'.format(dl_file=dl_file,
                                                      new_file=new_file)
    mv_file_exec = os.popen(stream_command)
    print(mv_file_exec.read())
    print("Finished scraping " + new_file)

    # TRANSFORM
    # Read the CSV file, convert to dataframe, remap the column headers
    stream = open(
        '/Users/andrewfelton/Documents/bb/bb-2021/python/scraping/field_name_mapping.yml',
        'r')
    mapper = yaml.load(stream, yaml.CLoader)
    fgfile = pd.read_csv(new_file)
    fgfile.insert(0, 'asof_date', scrapedate)
    for col in fgfile.columns:
        fgfile.rename(columns={col: col.lower()}, inplace=True)
    fgfile.rename(columns=mapper['fg'], inplace=True)
    fgfile[['fg_id']] = fgfile[['fg_id']].astype(str)

    # Check if the table already exists and if so, clear out the information from the scrapedate
    query_tables = "SELECT * FROM pg_catalog.pg_tables WHERE schemaname='tracking';"
    tables_list = [t for t in bbdb.execute(query_tables)]
    if (table in tables_list):
        if schema == 'hist':
            command = "DELETE FROM {schema}.{table} WHERE asof_date='{scrapedate}';".format(
                schema=schema, table=table, scrapedate=scrapedate)
        elif schema == 'tracking':
            command = 'TRUNCATE TABLE tracking.{table}'.format(table=table)
        bbdb.execute(command)

    # Load to the database
    fgfile.to_sql(name=table, con=bbdb, schema=schema, if_exists='append')
    return fgfile
Beispiel #29
0
def scrape_razz(mytype, url, logged_in_driver=False, merge_ownership=True):
    from bs4 import BeautifulSoup
    import pandas as pd
    from datetime import datetime
    from datetime import date
    import os
    from selenium.common.exceptions import NoSuchElementException
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    from general import selenium_utilities
    from general import postgres
    from munging import player_names
    from munging import rosters

    print('Going to scrape '+mytype+' from '+url)

    if not logged_in_driver:
        driver = selenium_utilities.start_driver()
        waiter = WebDriverWait(driver, 10)

        # Get the home page
        driver.get("https://razzball.com/")
        #expected condition
        waiter.until(EC.presence_of_element_located((By.ID, 'sitemenu')))
        #JavaScript Executor to stop page load
        driver.execute_script("window.stop();")

        # Check if already logged in
        try:
            user_info = driver.find_element_by_id('wp-admin-bar-user-info')
        except NoSuchElementException:
            # If not logged in, then log in
            driver.get("https://razzball.com/wp-login.php?redirect_to=https%3A%2F%2Frazzball.com")
            input_login = driver.find_element_by_id('user_login')
            input_login.send_keys('*****@*****.**')
            input_pw = driver.find_element_by_id('user_pass')
            input_pw.send_keys('36Pm4jKml7')
            input_submit = driver.find_element_by_id('wp-submit')
            input_submit.click()

    # Go to the projections page
    driver.get(url)
    print("Arrived at "+url)

    # Copy the csv window into BS
    soup = BeautifulSoup(driver.page_source, 'lxml')
    table = soup.find('table', id='neorazzstatstable')
    # Close it down
    driver.close()

    streamers = []

    colnames = []
    # Get the list of column names
    ths = table.findAll('th')
    for th in ths:
        colname = th.text.lower()
        colname = colname.replace('#', 'rank').replace('$','value').replace('!','').replace('%','pct_')
        colnames.append(colname)
    # Insert Razz ID before Name
    colnames.insert(colnames.index('name'), 'razz_id')

    # Loop through all the rows and append them to the list
    trows = table.findAll('tr')
    for trow in trows:
        streamer = []
        tds = trow.findAll('td')
        append = True

        if [] == list(set(trow['class']) &
                      set(['class=sorter-head', 'tablesorter-headerRow', 'tablesorter-ignoreRow'])):
            for loc, td in enumerate(tds):
                if (loc+1)==colnames.index('name'):
                    player_url = td.find('a')['href']
                    player_id = player_url.split('/')[4]
                    player_id = '660271' if (str(player_id) == '6602710') else player_id  # Manual correction for Ohtani
                    streamer.append(player_id)  # Razz ID
                    player_name = td.find('a').text
                    streamer.append(player_name)  # player name
                elif ('date' in colnames) and (loc+1)==colnames.index('date'):
                    date_str =td.text + '/2021'
                    streamdate = datetime.strptime(date_str, '%m/%d/%Y')
                    streamer.append(streamdate) # Date
                else:
                    try:
                        value = float(td.text)
                    except ValueError:
                        value = td.text
                    streamer.append(value)

            # Some times there are entries with missing values -- do not include those in the dataframe
            for var in ['pa', 'ip']:
                if var in colnames and (
                        str(streamer[colnames.index(var)])=='' or
                        str(streamer[colnames.index(var)])=='None'
                ):
                    append = False
            if streamer[colnames.index('razz_id')]==1.0:
                append = False
            if 'value' in colnames:
                if str(streamer[colnames.index('value')])=='':
                    append = False
            if append:
                streamers.append(streamer)
    df_streamers = pd.DataFrame(streamers, columns=colnames)
    df_streamers.drop(df_streamers[df_streamers['razz_id']==1.0].index, inplace=True)

    # fix date
    #date_str = tds[4].text + '/2021'
    #streamdate = datetime.strptime(date_str, '%m/%d/%Y')

    names = player_names.get_player_names()
    df_streamers = df_streamers.merge(right=names[['mlb_id', 'fg_id']], how='left', left_on='razz_id', right_on='mlb_id')
    df_streamers['fg_id'] = df_streamers.apply(lambda row: row['fg_id'] if str(row['fg_id'])!='nan' else row['razz_id'], axis=1)

    if merge_ownership:
        ff_rosters = rosters.get_ff_ownership().rename(columns={"Team": "SoS_Team"})
        legacy_rosters = rosters.get_legacy_ownership().rename(columns={"Team": "Legacy_Team"})
        df_streamers = df_streamers.merge(
            right=ff_rosters[['SoS_Team', 'fg_id']], how='left', on='fg_id').merge(
                right=legacy_rosters[['Legacy_Team', 'fg_id']], how='left', on='fg_id')

    # Save on computer as .csv file
    mysystem = 'razz'
    today = date.today().strftime("%Y%m%d")
    basename = "/Users/andrewfelton/Documents/bb/bb-2021/data/" + mysystem + '/' + mysystem + "_" + mytype
    new_file = basename + "_" + today + ".csv"
    df_streamers.to_csv(new_file)

    # create the soft link
    ln_file = basename + ".csv"
    command_ln = os.popen('ln -sf ' + new_file + ' ' + ln_file)
    print(command_ln)

    # Upload to the database
    tablename = mytype
    bbdb = postgres.connect_to_bbdb()

    query_tables = "SELECT * FROM pg_catalog.pg_tables WHERE schemaname='proj';"
    tables_list_result = bbdb.execute(query_tables)
    tables_list = []
    for table in tables_list_result:
        tables_list.append(table[1])

    if (tablename in tables_list):
        command = 'TRUNCATE TABLE proj."'+tablename+'"'
        bbdb.execute(command)
    df_streamers.to_sql(tablename, bbdb, schema='proj', if_exists='append', index=False)

    return df_streamers
Beispiel #30
0
def scrape_ff_player_pool():
    # This loops through the FF player pages and saves the player name, id, and eligibility to the database
    from bs4 import BeautifulSoup
    import pandas as pd
    import requests
    import time
    import unidecode

    import sys
    sys.path.append('python/munging')
    import player_names
    sys.path.append('python/general')
    import postgres

    # EXTRACT
    pitcher_base_url = 'https://www.fleaflicker.com/mlb/leagues/23172/players?season=2021&statType=1&sortMode=1&position=1536&isFreeAgent=false&tableSortDirection=DESC&tableSortName=pv7&tableOffset='
    hitter_base_url = 'https://www.fleaflicker.com/mlb/leagues/23172/players?season=2021&statType=1&sortMode=1&position=511&isFreeAgent=false&tableSortDirection=DESC&tableSortName=pv7&tableOffset='
    rp_base_url = 'https://www.fleaflicker.com/mlb/leagues/23172/players?season=2021&statType=1&sortMode=1&position=1536&isFreeAgent=false&tableSortName=st25&tableSortDirection=DESC&tableOffset='

    players = []
    for baseurl in [hitter_base_url, pitcher_base_url, rp_base_url]:
        count_top = 601
        if baseurl in [rp_base_url]:
            count_top = 201

        for i in range(0, count_top, 20):
            url = baseurl + str(i)
            page = requests.get(url)
            print('Got ' + url)
            time.sleep(1)
            soup = BeautifulSoup(page.text, 'html.parser')
            table = soup.find('div', {'id': 'body-center-main'}).find('table')

            count = 0
            trow = table.find('thead').next_sibling
            while trow is not None and count < 20:
                player_data = trow.find('div', {'class': 'player'})
                player_name = unidecode.unidecode(
                    player_data.find('a', {
                        'class': 'player-text'
                    }).text)
                player_id = player_data.find(
                    'a', {'class': 'player-text'
                          })['href'].split('/')[-1].split('-')[-1]
                player_url = 'https://www.fleaflicker.com' + player_data.find(
                    'a')['href']
                player_elig = player_data.find('span', {
                    'class': 'position'
                }).text
                player_team = player_data.find('span', {
                    'class': 'player-team'
                }).text
                #print('  '.join([player_name, player_id, elig]))
                players.append([
                    player_id, player_name, player_url, player_team,
                    player_elig
                ])
                trow = trow.next_sibling
                count = count + 1

    df_players = pd.DataFrame(
        players, columns=['ff_id', 'ff_name', 'ff_url', 'ff_team', 'ff_elig'])
    df_players.drop_duplicates(subset=['ff_id'],
                               inplace=True,
                               ignore_index=True)

    # TRANSFORM
    def combine_eligibilities(row):
        ff_elig_list = row['ff_elig'].split('/')
        #print(row['ff_name'])
        #print(row['ff_elig'])
        #print(ff_elig_list)
        eligibilities = []

        # Infielders
        for pos in ['C', '1B', '2B', 'SS', '3B']:
            if pos in ff_elig_list:
                eligibilities.append(pos)
        if '2B' in eligibilities or 'SS' in eligibilities:
            eligibilities.append('MI')
        if '1B' in eligibilities or '3B' in eligibilities:
            eligibilities.append('CI')
        if 'MI' in eligibilities or 'CI' in eligibilities:
            eligibilities.append('IF')

        # Outfielders
        for pos in ['OF', 'RF', 'LF', 'CF']:
            if pos in ff_elig_list and 'OF' not in eligibilities:
                eligibilities.append('OF')

        # Pitchers - just use the same as FF
        if 'SP' in eligibilities or 'RP' in eligibilities or 'P' in eligibilities:
            eligibilities = ff_elig_list

        #print(eligibilities)
        # Concatenate into a string and return
        elig = ' '.join(eligibilities).strip()
        #print(elig)
        if elig == '':
            elig = 'UT'
        return elig

    df_players['elig'] = df_players.apply(
        lambda row: combine_eligibilities(row), axis=1)

    names = player_names.get_player_names()
    df_players = df_players.merge(right=names[['ff_id', 'fg_id']],
                                  how='left',
                                  on='ff_id')

    # LOAD
    bbdb = postgres.connect_to_bbdb()
    df_players.to_sql('player_pool_ff',
                      con=bbdb,
                      schema='reference',
                      if_exists='replace',
                      chunksize=1000,
                      method='multi',
                      index=False)
    print('Uploaded FleaFlicker player pool')

    return df_players