def statistics_war(year):
    player_q = """SELECT
    player_name,
    team_id,
    pos,
    inn
    FROM statistics_fielding
    WHERE year = %s;
    """
    player_qry = player_q % (year)
    player_data = db.query(player_qry)

    entries = []
    for row in player_data:
        entry = {}
        entry['year'] = year
        player_name, team_id, position, inn = row
        entry['player_name'] = player_name

        search_name = player_name.replace("'","''")

        lookuptable = 'teams'
        team_abb = db.lookupValues("teams",("team_id","year",),(team_id,year),val="team_abb",operators=("=","="))[0]
        entry['team_abb'] = team_abb

        entry['position'] = position
        if position.lower() == 'p':
            continue
        else:

            # entry['bats'] = None
            
            entry['age'] = None

            entry['pa'] = None
            entry['inn'] = inn

            rn_val, err_val, arm_val, pb_val = helper.get_def_values(search_name, position, year)

            #1450 innings is a full season
            defense = float(inn)*(rn_val + err_val + arm_val + pb_val)/1450

            entry['defense'] = defense
            adj = float(helper.get_pos_adj(position.upper()))
            position_adj = adj*(float(inn)/1450)
            entry['position_adj'] = position_adj

            dwar = (defense+position_adj)/10

            entry['dWAR'] = dwar

            entries.append(entry)

    table = 'processed_compWAR_defensive'
    if entries != []: 
        db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0)
    db.conn.commit()
def calculate_war():
    player_q = """SELECT
    year,
    player_name,
    team_abb,
    ab, h, 2b, 3b, hr, bb, so, hbp, ibb, sh, sf, sb, cs
    FROM zips_offense
    """
    player_qry = player_q
    player_data = db.query(player_qry)

    entries = []
    for row in player_data:
        entry = {}
        year, player_name, team_abb, ab, h, _2, _3, hr, bb, so, hbp, ibb, sh, sf, sb, cs = row

        pa = ab + bb + hbp + ibb + sh + sf
        bb2 = bb + ibb
        _1 = h - _2 - _3 - hr

        team_abb = team_abb.upper()
        pf = float(helper.get_park_factors(team_abb, year - 1)) / float(100)

        babip = float((h - hr)) / float((ab + sh + sf - so - hr))

        ops, wOBA, park_wOBA, OPS_plus, wrc, wrc27, wRC_plus, raa, oWAR = helper.get_zips_offensive_metrics(
            year - 1, pf, pa, ab, bb2, hbp, _1, _2, _3, hr, sb, cs)

        entry['year'] = year
        entry['player_name'] = player_name
        entry['team_abb'] = team_abb
        entry['pf'] = pf
        entry['pa'] = pa
        entry['babip'] = babip
        entry['OPS_plus'] = OPS_plus
        entry['park_wOBA'] = park_wOBA
        entry['wRC_plus'] = wRC_plus

        entries.append(entry)

    table = 'zips_WAR_hitters_comp'
    if entries != []:
        for i in range(0, len(entries), 1000):
            db.insertRowDict(entries[i:i + 1000],
                             table,
                             insertMany=True,
                             replace=True,
                             rid=0,
                             debug=1)
            db.conn.commit()
Exemple #3
0
def initiate(yr, _type, player_mapper):
    path = '/Users/connordog/Dropbox/Desktop_Files/Work_Things/CodeBase/Python_Scripts/Python_Projects/NSBL/ad_hoc/historical_csv_files/'

    csv_file = path + '%s_zips_%s_splits.csv' % (yr, _type)

    print yr, _type

    entries = []
    with codecs.open(csv_file, 'rb', encoding='utf-8', errors='ignore') as f:
        mycsv = csv.reader(f)
        i = 0

        for row in mycsv:
            if i == 0:
                i += 1
                continue
            else:
                i += 1
                year, player_name, vs_hand, ab, h, _2b, _3b, hr, rbi, bb, so, hbp, ibb, sh, sf = row
                if player_name in player_mapper:
                    player_name = player_mapper.get(player_name)
                else:
                    helper.input_name(player_name)
                entry = {
                    "year": yr,
                    "player_name": player_name,
                    "vs_hand": vs_hand,
                    "ab": ab,
                    "h": h,
                    "2b": _2b,
                    "3b": _3b,
                    "hr": hr,
                    "rbi": rbi,
                    "bb": bb,
                    "so": so,
                    "hbp": hbp,
                    "ibb": ibb,
                    "sh": sh,
                    "sf": sf
                }
                entries.append(entry)

    table = 'zips_%s_splits' % (_type)
    if entries != []:
        db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0)
    db.conn.commit()
Exemple #4
0
def scrape_cur_standings():
    table_url = 'http://thensbl.com/orgstand.htm'

    tables = get_tables(table_url)

    standings_changed = False
    for table in tables:
        titles = table.find_all('tr', class_=re.compile('dmrptsecttitle'))

        for title in titles:
            element = []
            tit = title.get_text()
            if tit == 'Divisional':
                sql_table = 'team_standings'

                rows = table.find_all('tr', class_=re.compile('dmrptbody'))

                for row in rows:
                    element = []
                    for data in row:
                        if data.get_text() == '&nbsp':
                            element.append(None)
                        else:
                            #strip takes white space away from the front and end of a text string
                            element.append(data.get_text().strip())

                    year = element[0]
                    team_location_name = element[1]
                    wins = element[2]
                    losses = element[3]

                    if team_location_name is not None:
                        full_name = helper.get_team_name(
                            team_location_name, year)

                        qry = """SELECT ts.year
                        , ts.team_name
                        , MAX(ts.games_played) AS gp
                        FROM team_standings ts
                        WHERE 1
                            AND ts.team_name = '%s'
                            AND ts.year = %s
                        GROUP BY ts.team_name, ts.year"""

                        prev_gp = db.query(qry % (full_name, year))
                        if prev_gp == ():
                            print "\n\nNEW SEASON!!!!!\n\n"
                            prev_gp == 0
                        else:
                            prev_gp = prev_gp[0][2]

                        if int(wins) + int(losses) != prev_gp:
                            standings_changed = True

                        # print full_name, int(wins)+int(losses), prev_gp, standings_changed

    return standings_changed
Exemple #5
0
def input_data(ratings, sql_table, cats, year):
    print '\t' + sql_table
    entries = []
    for player in ratings:
        entry = {}
        entry['year'] = year
        for cat, val in zip(cats, player):
            # any category we aren't interested in recording, we mark as foo
            if cat != 'foo':
                # entry[cat] = val #####
                if cat == 'player_name' and val is not None:
                    entry[cat] = val.replace('*', '').replace('#', '')
                else:
                    entry[cat] = val

        if (entry.get("player_name") not in ('Total', None, '', 'Other')
                and entry.get("team_abb") not in ('Total', None, '', 'Other')):
            entries.append(entry)
        elif entry.get("team_name") not in ('Total', None, '', 'Other'):

            full_name = helper.get_team_name(entry.get("team_name"), year)
            entry['team_name'] = full_name
            if sql_table == 'team_standings':
                entry['games_played'] = int(entry.get('w')) + int(
                    entry.get('l'))
            entries.append(entry)

        if 'player_name' in entry:
            helper.input_name(entry.get('player_name'))

    # used for debugging
    # if entries != []:
    #     for entry in entries[0:30]:
    #         print '\t\t',
    #         print entry
    #     raw_input("")

    if entries != []:
        db.insertRowDict(entries,
                         sql_table,
                         insertMany=True,
                         rid=0,
                         replace=True)
    db.conn.commit()
def process_prospect_list(year, list_type, list_key):

    list_url = base_url + "%s%s" % (year, list_key)
    print "\n", year, list_type, list_url

    json = getter.get_url_data(list_url, "json")

    entries = []
    for plr in json:
        entry = {'prospect_type': list_type}
        for ky, val in plr.items():
            if type(val) in (str, unicode):
                val2 = "".join([i if ord(i) < 128 else "" for i in val])
                if val != val2 and 'name' in ky.lower():
                    print '\n\n\n\nUNICODE NAME!!!! - \n\t', val
                    print '\t', val2, '\n\n\n\n'
                if 'playerid' in ky.lower():
                    val = val2.replace(' ', '')
                else:
                    val = val2
            entry[ky.lower().replace("%", "%%")] = val

        if ('playername' not in entry or entry['playername'] == ''):
            continue

        if 'playerid' not in entry or entry['playerid'] == '':
            entry['playerid'] = str(
                entry['playername'].replace(' ', '').replace('*', '').replace(
                    ",", "")) + '_' + str(entry['type'].replace(' ', ''))
        if 'team' not in entry or entry['team'] == '':
            entry['team'] = '--empty--'

        print '\t', year, list_key, entry['playername']
        helper2.input_name(entry.get('playername'))
        db.insertRowDict(entry,
                         'fg_raw',
                         insertMany=False,
                         replace=True,
                         rid=0,
                         debug=1)
        db.conn.commit()
    sleep(sleep_time)
def process_division(year):
    print 'division'
    for _type in ('roster', 'projected'):
        # print '\t', _type
        for div in ('AL East', 'AL Central', 'AL West', 'NL East',
                    'NL Central', 'NL West'):

            qry = """SELECT 
            team_abb, team_name, 
            mean_W/162.0, var, year, games_played
            FROM __playoff_probabilities
            JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t2 USING (team_abb, year, games_played)
            WHERE strength_type = '%s'
            AND division = '%s'
            AND year = %s;"""
            query = qry % (_type, div, year)

            # raw_input(query)

            res = db.query(query)

            div_dict = {}
            for row in res:
                team_abb, team_name, strength_pct, var, year, games_played = row
                # print '\t\t', team_name

                if games_played > 162:
                    strength_pct = float(
                        (float(strength_pct) * 162.0) / float(games_played))
                else:
                    strength_pct = float(strength_pct)

                division, div_teams, conf_teams, non_conf_teams = helper.get_division(
                    team_name, year)

                win_division_prob = np.prod(
                    get_probabilities(team_name,
                                      div_teams, strength_pct, games_played,
                                      float(var), _type, year)[0])

                div_dict[team_name] = [
                    win_division_prob, 1.0, False, year, games_played
                ]

            col_name = 'win_division'
            adjust_probabilities(div_dict, col_name, 1.0, _type)
def initiate(yr, _type, player_mapper):
    path = '/Users/connordog/Dropbox/Desktop_Files/Work_Things/CodeBase/Python_Scripts/Python_Projects/NSBL/ad_hoc/historical_csv_files/'

    csv_file_ext = '%s_zips_%s.csv'  % (yr, _type)
    csv_file = path+csv_file_ext

    print yr, _type, csv_file_ext

    entries = []
    with codecs.open(csv_file, 'rb', encoding='utf-8', errors='ignore') as f:
        mycsv = csv.reader(f)
        i = 0

        for row in mycsv:
            if i == 0:
                i += 1
                continue
            else:
                i += 1
                if _type == 'offense':
                    year, player_name, team_abb, age, bats, g, ab, r, h, _2b, _3b, hr, rbi , bb, so , hbp, sb, cs, sh, sf, ibb, war = row 
                    if player_name in player_mapper:
                        player_name = player_mapper.get(player_name)
                    else:
                        helper.input_name(player_name)
                    # print player_name
                    entry = {"year":yr, "player_name":player_name, "team_abb":team_abb, "age":age, "bats":bats, "g":g, "ab":ab, "r":r, "h":h, "2b":_2b, "3b":_3b, "hr":hr, "rbi":rbi, "bb":bb, "so":so, "hbp":hbp, "sb":sb, "cs":cs, "sh":sh, "sf":sf, "ibb":ibb, "zWAR":war}
                    entries.append(entry)

                elif _type == 'pitching':
                    year, player_name, team_abb, age, throws, w, l, era, g, gs, ip, h, r, er, hr, bb, so, war = row 
                    if player_name in player_mapper:
                        player_name = player_mapper.get(player_name)
                    else:
                        helper.input_name(player_name)
                    entry = {"year":yr, "player_name":player_name, "team_abb":team_abb, "age":age, "throws":throws, "w":w, "l":l, "era":era, "g":g, "gs":gs, "ip":ip, "h":h, "r":r, "er":er, "hr":hr, "bb":bb, "so":so, "zWAR":war}
                    entries.append(entry)

                elif _type == 'defense':
                    year, player_name, c_rn, c_er, _1b_rn, _1b_er, _2b_rn, _2b_er, _3b_rn, _3b_er, ss_rn, ss_er, lf_rn, lf_er, cf_rn, cf_er, rf_rn, rf_er, c_arm, of_arm, pb, FOO = row 
                    if player_name in player_mapper:
                        player_name = player_mapper.get(player_name)
                    else:
                        helper.input_name(player_name)
                    entry = {"year":yr, "player_name":player_name, "c_range":c_rn, "c_error":c_er, "1b_range":_1b_rn, "1b_error":_1b_er, "2b_range":_2b_rn, "2b_error":_2b_er, "3b_range":_3b_rn, "3b_error":_3b_er, "ss_range":ss_rn, "ss_error":ss_er, "lf_range":lf_rn, "lf_error":lf_er, "cf_range":cf_rn, "cf_error":cf_er, "rf_range":rf_rn, "rf_error":rf_er, "c_arm":c_arm, "of_arm":of_arm, "c_pb":pb}
                    entries.append(entry)
                # print i, _type, player_name

    table = 'zips_%s' % (_type)
    if entries != []: 
        db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0)
    db.conn.commit()
def process_top_seed(year):
    print "top seed"

    for _type in ('roster', 'projected'):
        # print '\t', _type
        for conf in ('AL', 'NL'):
            team_qry = """SELECT 
            team_abb, team_name, win_division,
            mean_W/162.0, var, year, games_played
            FROM __playoff_probabilities
            JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t2 USING (team_abb, year, games_played)
            WHERE strength_type = '%s' 
            AND LEFT(division,2) = '%s'
            AND year = %s;"""
            team_query = team_qry % (_type, conf, year)
            # raw_input(team_query)
            team_res = db.query(team_query)

            top_dict = {}
            for team_row in team_res:
                team_abb, team_name, max_prob, strength_pct, var, year, games_played = team_row
                max_prob = float(max_prob)
                # print '\t\t', team_name

                if games_played > 162:
                    strength_pct = float(
                        (float(strength_pct) * 162.0) / float(games_played))
                else:
                    strength_pct = float(strength_pct)

                division, div_teams, conf_teams, non_conf_teams = helper.get_division(
                    team_name, year)

                top_seed_prob = np.prod(
                    get_probabilities(team_name,
                                      conf_teams, strength_pct, games_played,
                                      float(var), _type, year)[0])

                top_dict[team_name] = [
                    top_seed_prob, max_prob, False, year, games_played
                ]

            col_name = 'top_seed'
            adjust_probabilities(top_dict, col_name, 1.0, _type)
Exemple #10
0
def process_urls(urls, year):
    print year
    for teamcnt, team_pair in enumerate(urls):
        for tm, url in team_pair.items():
            print '\t', str(teamcnt + 1), tm, '-', url

            tm_list = []
            tm_query = db.query(
                "SELECT DISTINCT team_abb FROM zips_fangraphs_batters_counting WHERE year = %s"
                % (year))
            for t in tm_query:
                tm_list.append(t[0])
            if tm in tm_list:
                continue

            sleep(sleep_time)
            team_data = requests.get(url)
            team_soup = BeautifulSoup(team_data.content, "lxml")

            postmeta_date = team_soup.find(
                class_="postmeta").findAll("div")[-1].getText()
            post_date = parse(postmeta_date).strftime("%Y-%m-%d")

            tables = team_soup.findAll(
                "table", {
                    "class": [
                        "sortable", "sortable table-equal-width",
                        "table-equal-width"
                    ]
                })
            print len(tables)

            if len(tables) == 0:
                tables = team_soup.findAll("table")[11:]
                print len(tables)

            j = 0
            for table in tables:
                # raw_input(table)
                headers = table.find("tr")
                # raw_input(headers)

                headers = headers.findAll()

                cats = []
                for h in headers:
                    cat = h.getText().replace('/', '_').replace(
                        '+', '_Plus').replace('-', '_Minus').replace(
                            'No. 1 Comp', 'Top_Comp').replace('%', '_Pct')
                    cats.append(cat)

                if len(cats) < 10:
                    continue
                else:
                    j = j + 1

                # raw_input(i)
                entries = []
                if j == 1:
                    db_table = "zips_fangraphs_batters_counting"
                elif j == 2:
                    db_table = "zips_fangraphs_batters_rate"
                elif j == 3:
                    db_table = "zips_fangraphs_pitchers_counting"
                elif j == 4:
                    db_table = "zips_fangraphs_pitchers_rate"

                print '\t\t', db_table

                # print cats
                rows = table.findAll("tr")

                for r in rows:
                    # print r
                    # print r.get("class")
                    # raw_input("")
                    if r.get("class") is None:
                        entry = {}
                        entry["year"] = year
                        entry["team_abb"] = tm
                        entry["post_date"] = post_date
                        atts = r.findAll("td")
                        # raw_input(atts)
                        if atts != []:
                            for k, att in enumerate(atts):
                                fld = att.getText()
                                fld = "".join(
                                    [i if ord(i) < 128 else "" for i in fld])
                                entry[cats[k]] = fld

                            # print '\t\t\t', entry
                            if entry["Player"] != "":
                                helper.input_name(entry.get('Player'))
                                entries.append(entry)

                if entries != []:
                    for i in range(0, len(entries), 1000):
                        db.insertRowDict(entries[i:i + 1000],
                                         db_table,
                                         insertMany=True,
                                         replace=True,
                                         rid=0,
                                         debug=1)
                        db.conn.commit()
Exemple #11
0
def offensive_war(year):
    player_q = """SELECT
    player_name,
    team_abb,
    position,
    age,
    pa,
    ab,
    (h-2b-3b-hr) as 1b, 2b, 3b, hr, r, rbi, bb, k, hbp, sb, cs, ops, babip
    FROM register_batting_primary
    JOIN register_batting_secondary USING (year, player_name, team_abb, position, age)
    JOIN register_batting_analytical USING (year, player_name, team_abb, position, age)
    WHERE year = %s;
    """
    player_qry = player_q % (year)
    player_data = db.query(player_qry)

    entries = []
    for row in player_data:
        entry = {}
        entry['year'] = year
        player_name, team_abb, position, age, pa, ab, _1b, _2b, _3b, hr, r, rbi, bb, k, hbp, sb, cs, ops, babip = row
        entry['player_name'] = player_name
        entry['team_abb'] = team_abb
        entry['position'] = position
        # if player_name[len(player_name)-1:] == "*":
        #     bats = 'l'
        # elif player_name[len(player_name)-1:] == "#":
        #     bats = 's'
        # else:
        #     bats = 'r'
        # entry['bats'] = bats

        entry['age'] = age

        entry['pa'] = pa

        team_abb = team_abb.upper()
        pf = float(helper.get_park_factors(team_abb, year)) / float(100)
        entry['pf'] = pf
        entry['ops'] = ops
        entry['babip'] = babip

        foo, wOBA, park_wOBA, OPS_plus, wrc, wrc27, wRC_plus, raa, oWAR = helper.get_offensive_metrics(
            year, pf, pa, ab, bb, hbp, _1b, _2b, _3b, hr, sb, cs)

        entry['wOBA'] = wOBA
        entry['park_wOBA'] = park_wOBA
        entry['OPS_plus'] = OPS_plus
        entry['wrc'] = wrc
        entry['wRC_27'] = wrc27
        entry['wRC_plus'] = wRC_plus
        entry['raa'] = raa
        entry['oWAR'] = oWAR

        entries.append(entry)

    table = 'processed_compWAR_offensive'
    if entries != []:
        db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0)
    db.conn.commit()
Exemple #12
0
def calculate_war():
    player_q = """SELECT
    year,
    player_name,
    team_abb,
    age,
    g, 
    gs,
    era,
    ip,
    h, r, er, bb, so, hr
    FROM zips_pitching
    """
    player_qry = player_q
    player_data = db.query(player_qry)

    entries = []
    for row in player_data:
        entry = {}
        year, player_name, team_abb, age, g, gs, era, ip, h, r, er, bb, k, hr = row
        print year, player_name

        team_abb = team_abb.upper()
        pf = float(helper.get_park_factors(team_abb, year - 1)) / float(100)

        if ip == 0:
            k_9 = 0.0
            if bb > 0:
                bb_9 = 99.0
                k_bb = 99.0
            else:
                bb_9 = 0.0
                k_bb = 0.0
            if hr > 0:
                hr_9 = 99.0
            else:
                hr_9 = 0.0
        else:
            k_9 = (float(k) / float(ip)) * 9
            bb_9 = (float(bb) / float(ip)) * 9
            hr_9 = (float(hr) / float(ip)) * 9
            if bb == 0:
                if k > 0:
                    k_bb = 99.0
                else:
                    k_bb = 0.0
            else:
                k_bb = (float(k) / float(bb))

        fip_const = float(
            helper.get_zips_average_pitchers(year - 1, 'fip_const'))
        FIP = ((((13 * float(hr)) + (3 * float(bb)) -
                 (2 * float(k))) / float(ip)) + fip_const)
        park_FIP, FIP_min, FIP_WAR = helper.get_zips_pitching_metrics(
            FIP, ip, year - 1, pf, g, gs, 'fip')

        ERA = float(era)
        park_ERA, ERA_min, ERA_WAR = helper.get_zips_pitching_metrics(
            ERA, ip, year - 1, pf, g, gs, 'era')

        entry['year'] = year
        entry['player_name'] = player_name
        entry['team_abb'] = team_abb
        entry['age'] = age
        entry['pf'] = pf
        entry['ip'] = ip
        entry['k_9'] = k_9
        entry['bb_9'] = bb_9
        entry['k_bb'] = k_bb
        entry['hr_9'] = hr_9
        entry['FIP'] = FIP
        entry['park_FIP'] = park_FIP
        entry['FIP_minus'] = FIP_min
        entry['FIP_WAR'] = FIP_WAR
        entry['ERA'] = era
        entry['park_ERA'] = park_ERA
        entry['ERA_minus'] = ERA_min
        entry['ERA_WAR'] = ERA_WAR

        entries.append(entry)

    table = 'zips_WAR_pitchers'
    if entries != []:
        for i in range(0, len(entries), 1000):
            db.insertRowDict(entries[i:i + 1000],
                             table,
                             insertMany=True,
                             replace=True,
                             rid=0,
                             debug=1)
            db.conn.commit()
Exemple #13
0
def get_optimal_lineups(year, season_gp):
    optimal_query = """SELECT team_abb, 
    starter_val, bullpen_val, 
    l.lineup_val AS lineup_vsL, r.lineup_val AS lineup_vsR,
    total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val) AS roster_WAR,
    starter_var, bullpen_var,
    l.lineup_var AS vsL_var, r.lineup_var AS vsR_var,
    total_var + 0.25*l.lineup_var + 0.75*r.lineup_var AS roster_var
    FROM __optimal_pitching p
    JOIN __optimal_lineups l USING (team_abb)
    JOIN __optimal_lineups r USING (team_abb)
    WHERE l.vs_hand = 'l'
    AND r.vs_hand = 'r'
    AND l.dh_name IS NOT NULL
    AND r.dh_name IS NOT NULL
    ORDER BY team_abb ASC;"""

    total_roster_war_query = """SELECT
    SUM(p.total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val)) AS roster_WAR
    FROM __optimal_pitching p
    JOIN __optimal_lineups l USING (team_abb)
    JOIN __optimal_lineups r USING (team_abb)
    WHERE l.vs_hand = 'l'
    AND r.vs_hand = 'r'
    AND l.dh_name IS NOT NULL
    AND r.dh_name IS NOT NULL;"""

    # should be around ~1000
    total_roster_war = db.query(total_roster_war_query)[0][0]

    # should be around 48 (48-114 replacement level?)
    replacement_team_wins = (2430-float(total_roster_war))/30

    # should be around .300
    rep_team_win_pct = float(replacement_team_wins)/162

    optimal_res = db.query(optimal_query)

    for row in optimal_res:
        entry = {}
        team_abb, starter_val, bullpen_val, lu_vsL, lu_vsR, roster_WAR, starter_var, bullpen_var, vsL_var, vsR_var, roster_var = row

        mascot_name = helper.get_mascot_names(team_abb.upper(), year)
        team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name)
        team_abb = helper.get_team_abb(team_name, year)
        # mascot_name = helper.get_mascot_names(team_abb.upper(), year-1)
        # team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name)
        # team_abb = helper.get_team_abb(team_name, year-1)


        games_played = float(games_played)

        if games_played > 162.0:
            roster_W = float(roster_WAR) + rep_team_win_pct*games_played
            roster_pct = roster_W/games_played
            ros_g = 0
        else:
            roster_W = float(roster_WAR) + rep_team_win_pct*162
            roster_pct = roster_W/162.0
            ros_g = 162-games_played

        try:
            w_pct = float(W)/float(W+L)
            py_pct = float(py_W)/float(py_W+py_L)
        except ZeroDivisionError:
            w_pct = 0.5
            py_pct = 0.5


        # logistic weights for pythag% and win&
        # rest of season projected win% = (1-2w)*(roster%) + w(pythag%) + w(win%)
        # where w = (0.25) / (1+20e^(-0.035*games_played))
        if games_played <= 10:
            current_weight = 0.0015*float(games_played)
        else:
            current_weight = 0.25 / (1 + 20*math.exp(-0.035*float(games_played)))

        ros_pct = (1-2*current_weight)*roster_pct + (current_weight)*max(py_pct, 0.25) + (current_weight)*max(w_pct, 0.25)

        ros_W = ros_pct*ros_g


        # for the total amount of variance for the team, we first take the total amount of variance from team projections (based on the variance in each individual player's projection)
        total_roster_var = float(roster_var) 
        # then we add a measure of variance based on the difference between true talent record (pythag record) and observed record (see /variance_research/Full Season Pythag Standings std research.png)
        total_roster_var += -0.0055021865*(ros_pct*162) + 3.4706743014
        # Finally we add a value of 5.0 to the STANDARD DEVIATION (not variance). We can express the amount of variance desired to add in the set of equations {std = sqrt(v), std+5.0 = sqrt(v+c)}, and then solving for c (https://tinyurl.com/y8tk64ez)
        # NB. the value of 5.0 is a guess (~0.33 win for each starter plus a small amount for bench players and relief pitchers) and hack-y and should be cleaned up, or at least weighted more towards defensive #s over wOBA numbers) wins to the variance due to my uncertain nature (mostly from defense) of my conversion from raw ZiPS to DMB WAR (i.e., I think if my projection says the team is a true talent 90 win team, I think there is +/- 5.0 wins of standard deviation in that projection)
        total_roster_var += 10*math.sqrt(total_roster_var) + 25

        



        projected_W = W + ros_W
        

        if games_played > 162.0:
            roster_L = games_played - roster_W
            projected_L = games_played - projected_W
            projected_pct = projected_W/games_played
        else:
            roster_L = 162.0 - roster_W
            projected_L = 162.0 - projected_W
            projected_pct = projected_W/162.0

        entry['team_abb'] = team_abb
        entry['team_name'] = team_name
        entry['year'] = year
        entry['season_gp'] = season_gp
        entry['games_played'] = games_played
        entry['starter_val'] = starter_val
        entry['bullpen_val'] = bullpen_val
        entry['vsR_val'] = lu_vsR
        entry['vsL_val'] = lu_vsL
        entry['roster_strength'] = roster_WAR
        entry['starter_var'] = starter_var
        entry['bullpen_var'] = bullpen_var
        entry['vsR_var'] = vsR_var
        entry['vsL_var'] = vsL_var
        entry['roster_var'] = roster_var
        entry['overall_var'] = total_roster_var    
        entry['roster_W'] = roster_W
        entry['roster_L'] = roster_L
        entry['roster_pct'] = roster_pct
        entry['current_W'] = W
        entry['current_L'] = L
        entry['current_pct'] = w_pct
        entry['ros_W'] = ros_W
        entry['ros_L'] = ros_g - ros_W
        entry['ros_pct'] = ros_pct
        entry['projected_W'] = projected_W
        entry['projected_L'] = projected_L
        entry['projected_pct'] = projected_pct

        # raw_input(entry)
        db.insertRowDict(entry, '__team_strength', insertMany=False, replace=True, rid=0,debug=1)
        db.conn.commit()
def parse_prospect(rnk, year, prospect, team):
    prospect_type = (team if team in ("draft", "international") else "professional")
    entry = {}
    def print_prospect_details (prospect):
        def print_dict(k, v, lvl):
            for num in range(1, lvl):
                print "\t",
            if type(v) is dict:
                print k
                for y, z in j.items():
                    print_dict(y, z, lvl+1)
            else:
                print (str(k)[:20] if len(str(k)) > 20 else str(k).ljust(20)), "\t", ("SOME LIST" if type(v) is list else v)

        for a, b in prospect.items():
            print_dict(a, b, 1)

    def process_grades(year, grades_id, grades, player_type, prospect_type):
        grade_entry = {"year":year, "grades_id":grades_id, "prospect_type":prospect_type}
        fv = 0
        for g in grades:
            if g.get("key") is None:
                continue
            if g.get("key").lower().strip() == "overall":
                fv = g.get("value")
            elif g.get("key").lower().strip() not in ("fastball", "change", "curve", "slider", "cutter", "splitter", "control", "hit", "power", "run", "arm", "field", "speed", "throw", "defense"):
                grade_entry["other"] = g.get("value")
            else:
                if g.get("key").lower().strip() == "speed":
                    grade_entry["run"] = g.get("value")
                elif g.get("key").lower().strip() == "throw":
                    grade_entry["arm"] = g.get("value")
                elif g.get("key").lower().strip() == "defense":
                    grade_entry["field"] = g.get("value")
                else:
                    grade_entry[g.get("key").lower().strip()] = g.get("value")

        if "hit" in grade_entry or "field" in grade_entry:
            grades_table = "mlb_grades_hitters"
        elif "control" in grade_entry or "fastball" in grade_entry:
            grades_table = "mlb_grades_pitchers"
        else:
            print "\n\n\n", grades, "\n\n\n"
            return fv
        db.insertRowDict(grade_entry, grades_table, insertMany=False, replace=True, rid=0,debug=1)
        db.conn.commit()
        return fv

    # print_prospect_details(prospect)

    mlb_id = prospect.get("player").get("id")
    fname = prospect.get("player").get("useName")
    lname = prospect.get("player").get("lastName")
    input_name = fname + " " + lname
    helper2.input_name(input_name)
    fname, lname = helper.adjust_mlb_names(mlb_id, fname, lname)

    position = prospect.get("player").get("positionAbbreviation")
    position = helper.adjust_mlb_positions(mlb_id, position)

    entry["year"] = year
    entry["rank"] = rnk
    entry["mlb_id"] = mlb_id
    entry["fname"] = fname
    entry["lname"] = lname
    entry["position"] = position

    try:
        dob = prospect.get("player").get("birthDate")
        byear = dob.split("-")[0]
        bmonth = dob.split("-")[1]
        bday = dob.split("-")[2]
    except IndexError:
        print "\n\nNO BIRTHDAY", fname, lname, mlb_id, "\n\n"

    prospect_id = helper.add_prospect(mlb_id, fname, lname, byear, bmonth, bday, p_type=prospect_type)

    if prospect_id == 0 or prospect_id is None:
        grades_id = mlb_id
    else:
        grades_id = prospect_id

    entry["birth_year"] = byear
    entry["birth_month"] = bmonth
    entry["birth_day"] = bday
    entry["prospect_id"] = prospect_id
    entry["grades_id"] = grades_id

    bats = prospect.get("player").get("batSideCode")
    throws = prospect.get("player").get("pitchHandCode")
    weight = prospect.get("player").get("weight")
    try:
        height = prospect.get("player").get("height").replace("\"","").split("'")
        height = int(height[0])*12+int(height[1])
    except (IndexError, ValueError, AttributeError):
        height = None

    entry["bats"] = bats
    entry["throws"] = throws
    entry["weight"] = weight
    entry["height"] = height

    try:
        team = prospect.get("player").get("currentTeam").get("parentOrgName")
    except (AttributeError):
        team = None
    entry["team"] = team

    commit = prospect.get("prospectSchoolCommitted")
    entry["college_commit"] = commit

    eta = prospect.get("eta")
    entry["eta"] = eta

    hit_fv = None
    pitch_fv = None
    if prospect.get("gradesHitting") is not None and prospect.get("gradesHitting") != []:
        hit_grades = prospect.get("gradesHitting")
        hit_fv = process_grades(year, grades_id, hit_grades, "hit", prospect_type)

    if prospect.get("gradesPitching") is not None and prospect.get("gradesPitching") != []:
        pitch_grades = prospect.get("gradesPitching")
        pitch_fv = process_grades(year, grades_id, pitch_grades, "pitch", prospect_type)

    fv = max(hit_fv, pitch_fv)
    entry["FV"] = fv


    blurbs = prospect.get("prospectBio")
    sorted_blurbs = sorted(blurbs, key=lambda k:k["contentTitle"], reverse=True)
    cleaned_blurbs = []
    for i,b in enumerate(sorted_blurbs):
        if b.get("contentText") is None:
            sorted_blurbs[i] = None
        else:
            blurbtext = str(b.get("contentTitle")) + b.get("contentText").replace("<b>","").replace("</b>","").replace("<br />","").replace("<p>","\n").replace("</p>","").replace("*","").replace("<strong>","").replace("</strong>","")
            blurbtext = "".join([j if ord(j) < 128 else "" for j in blurbtext])
            cleaned_blurbs.append(blurbtext)

    blurb = "\n\n".join(cleaned_blurbs)
    entry["blurb"] = blurb

    # raw_input(entry)
    return entry
def current_series(year, timestamp):
    print '\tdetermining current series probabilities'

    games_query = "SELECT IFNULL(SUM(IF(winning_team IS NOT NULL,1,0)),0) FROM __in_playoff_game_results WHERE year = %s;" % (
        year)

    total_playoff_games_played = db.query(games_query)[0][0]

    qry = """SELECT 
    series_id, year, strength_type, 
    team, opponent,
    series_wins, series_losses
    FROM __in_playoff_bracket
    WHERE update_time = (SELECT MAX(update_time) FROM __in_playoff_bracket)
    AND year = %s;"""

    query = qry % (year)

    res = db.query(query)

    for row in res:
        series_id, year, strength_type, team, opponent, series_wins, series_losses = row

        series_type = series_id.replace('AL', '').replace('NL', '')[:2]
        games_dict = {'WC': 1, 'DS': 5, 'CS': 7, 'WS': 7}
        series_games = games_dict.get(series_type)

        team_abb = helper.get_team_abb(team, year)
        oppn_abb = helper.get_team_abb(opponent, year)

        team_winProb = get_single_game_win_prob(team_abb, oppn_abb,
                                                strength_type, year)

        entry = {
            'update_time': timestamp,
            'series_id': series_id,
            'year': year,
            'team': team,
            'opponent': opponent,
            'series_wins': series_wins,
            'series_losses': series_losses,
            'strength_type': strength_type,
            'team_winProb': team_winProb,
            'total_playoff_games_played': total_playoff_games_played
        }

        team_probs = []

        if series_wins == series_games / 2 + 1:
            team_probs.append(1)
            total_games = series_wins + series_losses
            if total_games > 2:
                colName = 'team_in' + str(total_games)
                entry[colName] = 1

        if series_losses == series_games / 2 + 1:
            team_probs.append(0)

        if (series_wins != series_games / 2 + 1
                and series_losses != series_games / 2 + 1):
            for end_game in range(series_games / 2 + 1,
                                  series_games + 1 - series_losses):
                team_in_N = BinomDist.pmf(n=end_game - 1 - series_wins,
                                          k=(series_games / 2 - series_wins),
                                          p=team_winProb) * team_winProb

                col_name = 'team_in' + str(end_game + series_losses)

                team_probs.append(team_in_N)

                if end_game > 2:
                    entry[col_name] = team_in_N

        entry['team_seriesProb'] = sum(team_probs)

        db.insertRowDict(entry,
                         '__in_playoff_bracket',
                         insertMany=False,
                         replace=True,
                         rid=0,
                         debug=1)
        db.conn.commit()
def process_wc2(year):
    print "wc2"
    for _type in ('roster', 'projected'):
        print '\t', _type
        for conf in ('AL', 'NL'):

            team_query = "SELECT team_abb, team_name, (win_division+wc_1), mean_W/162.0, var, year, games_played FROM __playoff_probabilities JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t2 USING (team_abb, year, games_played) WHERE strength_type = '%s' AND LEFT(division,2) = '%s'AND year = %s" % (
                _type, conf, year)

            team_res = db.query(team_query)

            wc2_dict = {}
            for team_row in team_res:
                team_abb, team_name, po_prob, strength_pct, var, year, games_played = team_row
                print '\t\t', team_name

                if games_played > 162:
                    strength_pct = float(
                        (float(strength_pct) * 162.0) / float(games_played))
                else:
                    strength_pct = float(strength_pct)

                division, div_teams, conf_teams, non_conf_teams = helper.get_division(
                    team_name, year)

                div_winners_qry = """SELECT 
                p1.team_name,
                p2.team_name,
                p3.team_name,
                p4.team_name,
                (p1.win_division+p1.wc_1)*(p2.win_division+p2.wc_1)*(p3.win_division+p3.wc_1)*(p4.win_division+p4.wc_1)
                FROM __playoff_probabilities p1
                JOIN __playoff_probabilities p2
                JOIN __playoff_probabilities p3
                JOIN __playoff_probabilities p4
                JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t1 ON (p1.team_abb=t1.team_abb AND p1.year=t1.year AND  p1.games_played=t1.games_played)
                JOIN (SELECT team_abb, MAX(YEAR) AS YEAR, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t2 ON (p2.team_abb=t2.team_abb AND p2.year=t2.year AND  p2.games_played=t2.games_played)
                JOIN (SELECT team_abb, MAX(YEAR) AS YEAR, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t3 ON (p3.team_abb=t3.team_abb AND p3.year=t3.year AND  p3.games_played=t3.games_played)
                JOIN (SELECT team_abb, MAX(YEAR) AS YEAR, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t4 ON (p4.team_abb=t4.team_abb AND p4.year=t4.year AND  p4.games_played=t4.games_played)
                WHERE 1
                AND p1.strength_type = '%s'
                AND p2.strength_type = '%s'
                AND p3.strength_type = '%s'
                AND p4.strength_type = '%s'
                AND p1.division = '%s West'
                AND p2.division = '%s Central'
                AND p3.division = '%s East'
                AND LEFT(p4.division,2) = '%s'
                AND p1.team_name != '%s'
                AND p2.team_name != '%s'
                AND p3.team_name != '%s'
                AND p4.team_name != '%s'
                AND p1.team_name != p4.team_name
                AND p2.team_name != p4.team_name
                AND p3.team_name != p4.team_name
                AND p1.year = %s
                AND p2.year = %s
                AND p3.year = %s
                AND p4.year = %s;"""

                div_winners_query = div_winners_qry % (
                    _type, _type, _type, _type, conf, conf, conf, conf,
                    team_name, team_name, team_name, team_name, year, year,
                    year, year)
                div_winners_res = db.query(div_winners_query)

                wc2_pre_prob = float(0.0)
                for div_row in div_winners_res:
                    div1_team, div2_team, div3_team, div4_team, situation_prob = div_row

                    set_teams = []
                    for tm in conf_teams:
                        if tm not in (div1_team, div2_team, div3_team,
                                      div4_team):
                            set_teams.append(tm)

                    win_wc2_prob = np.prod(
                        get_probabilities(team_name, set_teams, strength_pct,
                                          games_played, float(var), _type,
                                          year)[0])

                    wc2_pre_prob += (float(situation_prob) *
                                     float(win_wc2_prob))

                wc2_pre_prob = wc2_pre_prob * (1.0 - float(po_prob))
                wc2_dict[team_name] = [
                    wc2_pre_prob, (1.0 - float(po_prob)), False, year,
                    games_played
                ]

            col_name = 'wc_2'
            adjust_probabilities(wc2_dict, col_name, 1.0, _type)
def register_war(year):
    player_q = """SELECT
    player_name,
    team_abb,
    position,
    age,
    pa
    FROM register_batting_primary
    JOIN register_batting_secondary USING (year, player_name, team_abb, position, age)
    JOIN register_batting_analytical USING (year, player_name, team_abb, position, age)
    WHERE year = %s;
    """
    player_qry = player_q % (year)
    player_data = db.query(player_qry)

    entries = []
    for row in player_data:
        entry = {}
        entry['year'] = year
        player_name, team_abb, position, age, pa = row
        pa = float(pa)
        entry['player_name'] = player_name
        entry['team_abb'] = team_abb
        entry['position'] = position

        # bats = helper.get_hand(player_name)
        # entry['bats'] = bats

        if player_name[-1] not in ('*', '#'):
            s_name = player_name
        else:
            s_name = player_name[:len(player_name)-1]

        
        entry['age'] = age
        entry['pa'] = pa
        entry['inn'] = None

        if year < 2011:
            defense = 0.0
            entry['defense'] = defense

            adj = float(helper.get_pos_adj(position.upper()))
            position_adj = adj*(pa/600)
            entry['position_adj'] = position_adj

        else:
            # changes Travis d'Arnoud to Travis d''Arnoud
            search_name = s_name.replace("'","''")
            rn_val, err_val, arm_val, pb_val = helper.get_def_values(search_name, position, year)

            #600 pa is a full season
            defense = float(pa)*(rn_val + err_val + arm_val + pb_val)/600

            entry['defense'] = defense
            adj = float(helper.get_pos_adj(position.upper()))
            position_adj = adj*(float(pa)/600)
            entry['position_adj'] = position_adj

            # if player_name.lower() == 'derek jeter':
                # print rn_val, err_val, arm_val, pb_val
                # raw_input(entry)
            

        dwar = (defense+position_adj)/10.0

        entry['dWAR'] = dwar

        entries.append(entry)


    table = 'processed_compWAR_defensive'
    if entries != []: 
        db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0)
    db.conn.commit()
Exemple #18
0
def batters(year):

    player_q = """SELECT a.year
    , IFNULL(CONCAT(nm.right_fname, ' ', nm.right_lname), a.Player) AS player
    , a.team_abb
    , a.age
    , a.B as hand
    , a.PO
    , COALESCE(a.PA, c.PA) AS pa
    , a.ab
    , a.h
    , a.2b
    , a.3b
    , a.hr
    , a.bb
    , a.so
    , a.sb
    , a.cs
    , BA
    , OBP
    , SLG
    , BABIP
    , OPS_Plus
    , DEF
    , c.WAR
    , cv.yr1_WAR
    , cv.yr1_value
    , cv.yr2_WAR
    , cv.yr2_value
    , cv.yr3_WAR
    , cv.yr3_value
    , cv.yr4_WAR
    , cv.yr4_value
    , cv.yr5_WAR
    , cv.yr5_value
    , cv.yr6_WAR
    , cv.yr6_value
    , cv.yr7_WAR
    , cv.yr7_value
    , cv.yr8_WAR
    , cv.yr8_value
    FROM zips_fangraphs_batters_counting a
    JOIN(
        SELECT year
        , Player
        , MAX(post_date) AS post_date
        FROM zips_fangraphs_batters_counting
        WHERE 1
            AND year = %s
        GROUP BY year, Player
    ) b USING (year,Player,post_date)
    LEFT JOIN zips_fangraphs_batters_rate c USING (year, Player, team_abb)
    LEFT JOIN name_mapper nm ON (1
        AND a.Player = nm.wrong_name
        AND (nm.start_year IS NULL OR nm.start_year <= a.year)
        AND (nm.end_year IS NULL OR nm.end_year >= a.year)
        AND (nm.position = '' OR nm.position = a.PO)
        AND (nm.rl_team = '' OR nm.rl_team = a.team_abb)
        # AND (nm.nsbl_team = '' OR nm.nsbl_team = rbp.team_abb)
    )
    LEFT JOIN name_mapper nm2 ON (nm.right_fname = nm2.right_fname
        AND nm.right_lname = nm2.right_lname
        AND (nm.start_year IS NULL OR nm.start_year = nm2.start_year)
        AND (nm.end_year IS NULL OR nm.end_year = nm2.end_year)
        AND (nm.position = '' OR nm.position = nm2.position)
        AND (nm.rl_team = '' OR nm.rl_team = nm2.rl_team)
    )
    JOIN zips_FA_contract_value_batters cv ON (a.year = cv.year 
        AND a.team_abb = cv.team_abb
        AND IFNULL(nm2.wrong_name, a.Player) = cv.Player
    ) 
    ;"""
    player_qry = player_q % (year)
    # raw_input(player_qry)
    player_data = db.query(player_qry)

    entries = []
    for row in player_data:
        entry = {}
        year, player_name, team_abb, age, hand, po, pa, ab, h, _2, _3, hr, bb, so, sb, cs, ba, obp, slg, babip, zOPS_Plus, DEF, WAR, yr1_WAR, yr1_value, yr2_WAR, yr2_value, yr3_WAR, yr3_value, yr4_WAR, yr4_value, yr5_WAR, yr5_value, yr6_WAR, yr6_value, yr7_WAR, yr7_value, yr8_WAR, yr8_value = row

        if pa is None:
            pa = ab + bb
        bb2 = bb
        hbp = 0
        _1 = h - _2 - _3 - hr

        team_abb = team_abb.upper()

        pf = float(helper.get_park_factors(team_abb, year - 1)) / float(100)

        if po.lower() != 'c':
            scaledWAR = 600 * (float(WAR) / float(pa))
        else:
            scaledWAR = 450 * (float(WAR) / float(pa))

        ops, wOBA, park_wOBA, OPS_plus, wrc, wrc27, wRC_plus, raa, oWAR = helper.get_zips_offensive_metrics(
            year - 1, pf, pa, ab, bb2, hbp, _1, _2, _3, hr, sb, cs)

        entry['year'] = year
        entry['player_name'] = player_name
        entry['team_abb'] = team_abb
        entry['age'] = age
        entry['hand'] = hand
        entry['pos'] = po
        entry['pf'] = pf
        entry['pa'] = pa
        entry['ba'] = ba
        entry['obp'] = obp
        entry['slg'] = slg
        entry['zOPS_Plus'] = zOPS_Plus
        entry['DEF'] = DEF
        entry['zWAR'] = WAR
        entry['babip'] = babip
        entry['OPS_plus'] = OPS_plus
        entry['park_wOBA'] = park_wOBA
        entry['wRC_plus'] = wRC_plus
        entry['scaledWAR'] = scaledWAR
        entry['yr1_WAR'] = yr1_WAR
        entry['yr1_value'] = yr1_value
        entry['yr2_WAR'] = yr2_WAR
        entry['yr2_value'] = yr2_value
        entry['yr3_WAR'] = yr3_WAR
        entry['yr3_value'] = yr3_value
        entry['yr4_WAR'] = yr4_WAR
        entry['yr4_value'] = yr4_value
        entry['yr5_WAR'] = yr5_WAR
        entry['yr5_value'] = yr5_value
        entry['yr6_WAR'] = yr6_WAR
        entry['yr6_value'] = yr6_value
        entry['yr7_WAR'] = yr7_WAR
        entry['yr7_value'] = yr7_value
        entry['yr8_WAR'] = yr8_WAR
        entry['yr8_value'] = yr8_value

        entries.append(entry)

    table = 'zips_fangraphs_prep_FA_batters'
    print table
    if entries != []:
        for i in range(0, len(entries), 1000):
            db.insertRowDict(entries[i:i + 1000],
                             table,
                             insertMany=True,
                             replace=True,
                             rid=0,
                             debug=1)
            db.conn.commit()
def process_players(player_list, year, season_gp, team_name, team_abb, date):
    entries = []
    pos = ''
    for plr in player_list:
        if plr == []:
            continue
        if plr[0] == 'Pitchers':
            pos = 'p'
        elif plr[0] == 'Catchers':
            pos = 'c'
        elif plr[0] == 'Infield':
            pos = 'if'
        elif plr[0] == 'Outfield':
            pos = 'of'

        try:
            if (plr[1] == 'MLI') or ((float(plr[2]) > 0 or float(plr[3]) > 0)
                                     and plr[2] != ''):
                entry = {
                    'year': year,
                    'gp': season_gp,
                    'position': pos,
                    'team_abb': team_abb,
                    'date': date
                }

                entered_name = plr[0]
                if pos == 'c' and entered_name == 'Smith, Will':
                    entered_name = 'D. Smith, Will'
                player_name, first_name, last_name = name_parser(entered_name)
                entry['player_name'] = player_name
                entry['fname'] = first_name
                entry['lname'] = last_name
                entry['entered_name'] = entered_name

                contract_year = plr[1]
                if player_name == 'Max Stassi':
                    contract_year = 'V'
                entry['contract_year'] = contract_year

                if plr[1] == 'MLI':
                    salary = 1.1
                else:
                    salary = plr[2]
                entry['salary'] = salary

                if len(plr) < 4:
                    expires = 0
                else:
                    expires = plr[3]

                if len(plr) < 5:
                    opt = ''
                else:
                    opt = plr[4]

                if len(plr) < 6:
                    ntc = None
                else:
                    ntc = plr[5]

                # if len(plr) < 7:
                # salary_counted = 'N'
                # else:
                salary_counted = 'N'
                if (contract_year.lower() in ('v', 'ce', '4th', '5th', '6th')
                        or contract_year[-1] == 'G'):
                    salary_counted = 'Y'

                entry['expires'] = expires
                entry['opt'] = opt
                entry['ntc'] = ntc
                entry['salary_counted'] = salary_counted

                # for i,v in entry.items():
                #     print i, '\t', v

                helper.input_name(entry.get('player_name'))
                entries.append(entry)

        except (IndexError, ValueError):
            continue

    if entries != []:
        db.insertRowDict(entries,
                         'excel_rosters',
                         replace=True,
                         insertMany=True,
                         rid=0)
    db.conn.commit()
Exemple #20
0
def scrape_prospects(year, prospect_lists):

    list_cnt = 0
    for list_type in (prospect_lists):
        entries = []
        if list_type not in ("rule5", "prospects", "pdp", "rhp", "lhp", "c",
                             "1b", "2b", "3b", "ss", "of"):
            # if list_type in ("draft","int"):
            list_cnt += 1
            ind_list = prospect_lists[list_type]

            i = 0
            for player in ind_list:
                entry = {}
                i += 1
                sleep(sleep_time)
                mlb_id = player["player_id"]
                player_url = player_base_url % (year, mlb_id)

                print list_cnt, year, list_type, i, "\t", str(mlb_id)
                print "\t\t", str(player_url)

                sleep(sleep_time)
                player_json = getter.get_url_data(player_url, "json")

                try:
                    player_info = player_json["prospect_player"]
                except TypeError:
                    print "\n\n**ERROR TAG** TYPE_ERROR", str(year), str(
                        mlb_id), "\n\n"
                    continue

                fname = player_info["player_first_name"]
                lname = player_info["player_last_name"]
                input_name = fname + ' ' + lname
                helper2.input_name(input_name)
                fname, lname = helper.adjust_mlb_names(mlb_id, fname, lname)

                position = player_info["positions"]
                position = helper.adjust_mlb_positions(mlb_id, position)

                entry["year"] = year
                entry["rank"] = i
                entry["mlb_id"] = mlb_id
                entry["fname"] = fname
                entry["lname"] = lname
                entry["position"] = position

                if list_type in ("int", "draft"):
                    bats = player_info["bats"]
                    throws = player_info["thrw"]
                    try:
                        height = player_info["height"].replace("\"",
                                                               "").split("\"")
                        height = int(height[0]) * 12 + int(height[1])
                    except (IndexError, ValueError, AttributeError):
                        height = None
                    weight = player_info["weight"]
                    try:
                        dob = player_info["birthdate"]
                        byear = dob.split("/")[2]
                        bmonth = dob.split("/")[0]
                        bday = dob.split("/")[1]
                    except IndexError:
                        print '\n\nNO BIRTHDAY', fname, lname, mlb_id, "\n\n"
                        continue

                    byear, bmonth, bday = helper.adjust_mlb_birthdays(
                        mlb_id, byear, bmonth, bday)

                    prospect_id = helper.add_prospect(mlb_id,
                                                      fname,
                                                      lname,
                                                      byear,
                                                      bmonth,
                                                      bday,
                                                      p_type=list_type)

                else:
                    info_url = player2_base_url % mlb_id
                    print "\t\t", info_url

                    sleep(sleep_time)
                    info_json = getter.get_url_data(info_url,
                                                    "json",
                                                    json_unicode_convert=True)
                    try:
                        info_info = info_json["player_info"]["queryResults"][
                            "row"]
                    except TypeError:
                        print "\n\n**ERROR TAG** MLB_ERROR", str(year), str(
                            mlb_id), str(fname), str(lname), "\n\n"
                        continue

                    dob = info_info["birth_date"]
                    byear = dob.split("-")[0]
                    bmonth = dob.split("-")[1]
                    bday = dob.split("-")[2].split("T")[0]

                    prospect_id = helper.add_prospect(mlb_id,
                                                      fname,
                                                      lname,
                                                      byear,
                                                      bmonth,
                                                      bday,
                                                      p_type="professional")

                    try:
                        bats = info_info["bats"]
                        throws = info_info["throws"]
                        height = int(info_info["height_feet"]) * 12 + int(
                            info_info["height_inches"])
                        weight = int(info_info["weight"])
                    except UnicodeDecodeError:
                        bats, throws, height, weight = (None, None, None, None)
                    except ValueError:
                        print "\n\n**ERROR TAG** MLB_ERROR", str(year), str(
                            mlb_id), str(fname), str(lname), "\n\n"
                        continue

                if prospect_id == 0 or prospect_id is None:
                    grades_id = mlb_id
                else:
                    grades_id = prospect_id

                entry["prospect_id"] = prospect_id
                entry["grades_id"] = grades_id
                entry["bats"] = bats
                entry["throws"] = throws
                entry["height"] = height
                entry["weight"] = weight
                entry["birth_year"] = byear
                entry["birth_month"] = bmonth
                entry["birth_day"] = bday

                entry["team"] = player["team_file_code"]
                drafted = player_info["drafted"]

                if list_type == "int":
                    drafted = None
                    try:
                        sign_text = player_info["signed"]
                        sign_value = sign_text.split(" - ")[1]
                        signed = sign_value
                    except IndexError:
                        signed = ""
                    try:
                        signed = int(signed.replace("$", "").replace(",", ""))
                    except ValueError:
                        signed = None

                    schoolcity = player_info["school"]
                    gradecountry = player_info["year"]
                    commit = None

                elif list_type == "draft":
                    try:
                        signed = player_info["preseason20"].replace(
                            " ", "").replace(",", "").replace("$",
                                                              "").split("-")[1]
                    except (KeyError, IndexError):
                        signed = player_info["signed"].replace(
                            " ", "").replace(",", "").replace("$", "")
                    try:
                        signed = int(signed)
                    except ValueError:
                        signed = None
                    schoolcity = player_info["school"]
                    gradecountry = player_info["year"]
                    commit = player_info["signed"]
                else:
                    signed = player_info["signed"]
                    schoolcity = None
                    gradecountry = None
                    commit = None

                entry["drafted"] = drafted
                entry["signed"] = signed
                entry["school_city"] = schoolcity
                entry["grade_country"] = gradecountry
                entry["college_commit"] = commit

                if list_type not in ("int", "draft"):
                    eta = player_info["eta"]
                    try:
                        pre_top100 = player_info["preseason100"]
                    except KeyError:
                        pre_top100 = None
                else:
                    pre_top100 = None
                    eta = None

                entry["pre_top100"] = pre_top100
                entry["eta"] = eta

                entry["twitter"] = player_info["twitter"]

                blurb = player_info["content"]["default"].replace(
                    "<b>",
                    "").replace("</b>", "").replace("<br />", "").replace(
                        "<p>", "").replace("</p>", "").replace("*", "")
                entry["blurb"] = blurb

                try:
                    overall_text = blurb.split("Overall")[1].split(
                        '\n')[0].replace(':', '').replace(' ', '')[:8]
                    if overall_text[0] not in (' ', ':', '0', '1', '2', '3',
                                               '4', '5', '6', '7', '8', '9'):
                        raise IndexError

                    try:
                        text2 = overall_text.split('/')[1]
                    except IndexError:
                        text2 = overall_text.split('/')[-1]

                    overall = int(filter(str.isdigit, text2[:2]))
                except IndexError:
                    overall = 0

                if overall < 20 and overall is not None:
                    overall = overall * 10
                entry["FV"] = overall

                entries.append(entry)

        if list_type == "draft":
            table = "mlb_prospects_draft"
        elif list_type == "int":
            table = "mlb_prospects_international"
        else:
            table = "mlb_prospects_professional"

        for e in entries:
            raw_input(e)
Exemple #21
0
def pitchers(year):
    player_q = """SELECT a.year
    , IFNULL(CONCAT(nm.right_fname, ' ', nm.right_lname), a.Player) AS player
    , a.team_abb
    , a.age
    , T as hand
    , ERA
    , a.G
    , a.GS
    , IP
    , H
    , ER
    , HR
    , BB
    , SO
    , k_9
    , bb_9
    , hr_9
    , bb_pct
    , k_pct
    , BABIP
    , ERA_Plus
    , ERA_minus
    , COALESCE(a.FIP, c.FIP) AS FIP
    , c.WAR
    , cv.yr1_WAR
    , cv.yr1_value
    , cv.yr2_WAR
    , cv.yr2_value
    , cv.yr3_WAR
    , cv.yr3_value
    , cv.yr4_WAR
    , cv.yr4_value
    , cv.yr5_WAR
    , cv.yr5_value
    , cv.yr6_WAR
    , cv.yr6_value
    , cv.yr7_WAR
    , cv.yr7_value
    , cv.yr8_WAR
    , cv.yr8_value
    FROM zips_fangraphs_pitchers_counting a
    JOIN(
        SELECT year
        , Player
        , MAX(post_date) AS post_date
        FROM zips_fangraphs_pitchers_counting
        WHERE 1
            AND year = %s
        GROUP BY year, Player
    ) b USING (year,Player,post_date)
    LEFT JOIN zips_fangraphs_pitchers_rate c USING (year, Player, team_abb)
    LEFT JOIN name_mapper nm ON (1
        AND a.Player = nm.wrong_name
        AND (nm.start_year IS NULL OR nm.start_year <= a.year)
        AND (nm.end_year IS NULL OR nm.end_year >= a.year)
        # AND (nm.position = '' OR nm.position = a.PO)
        AND (nm.rl_team = '' OR nm.rl_team = a.team_abb)
        # AND (nm.nsbl_team = '' OR nm.nsbl_team = rbp.team_abb)
    )
    LEFT JOIN name_mapper nm2 ON (nm.right_fname = nm2.right_fname
        AND nm.right_lname = nm2.right_lname
        AND (nm.start_year IS NULL OR nm2.start_year = nm2.start_year)
        AND (nm.end_year IS NULL OR nm2.end_year = nm2.end_year)
        AND (nm.position = '' OR nm2.position = nm2.position)
        AND (nm.rl_team = '' OR nm2.rl_team = nm2.rl_team)
    )
    JOIN zips_FA_contract_value_pitchers cv ON (a.year = cv.year 
        AND a.team_abb = cv.team_abb
        AND IFNULL(nm2.wrong_name, a.Player) = cv.Player
    )
    ;"""

    player_qry = player_q % (year)
    # raw_input(player_qry)
    player_data = db.query(player_qry)

    entries = []
    for row in player_data:
        entry = {}
        year, player_name, team_abb, age, hand, era, g, gs, ip, h, er, hr, bb, k, k_9, bb_9, hr_9, bb_pct, k_pct, babip, zera_plus, zera_minus, zfip, zwar, yr1_WAR, yr1_value, yr2_WAR, yr2_value, yr3_WAR, yr3_value, yr4_WAR, yr4_value, yr5_WAR, yr5_value, yr6_WAR, yr6_value, yr7_WAR, yr7_value, yr8_WAR, yr8_value = row

        r = er
        if (gs >= 20 or float(gs) / float(g) > 0.8):
            pos = 'SP'
        else:
            pos = 'RP'

        team_abb = team_abb.upper()

        pf = float(helper.get_park_factors(team_abb, year - 1)) / float(100)

        if float(bb) == 0:
            if float(k) > 0:
                k_bb = 99.0
            else:
                k_bb = 0.0
        else:
            k_bb = (float(k) / float(bb))

        fip_const = float(
            helper.get_zips_average_pitchers(year - 1, 'fip_const'))
        FIP = ((((13 * float(hr)) + (3 * float(bb)) -
                 (2 * float(k))) / float(ip)) + fip_const)
        park_FIP, FIP_min, FIP_WAR = helper.get_zips_pitching_metrics(
            FIP, ip, year - 1, pf, g, gs, 'fip')

        ERA = float(era)
        park_ERA, ERA_min, ERA_WAR = helper.get_zips_pitching_metrics(
            ERA, ip, year - 1, pf, g, gs, 'era')

        if pos == 'SP':
            FIP_WAR = 32 * (float(FIP_WAR) / float(gs))
            ERA_WAR = 32 * (float(ERA_WAR) / float(gs))
        elif pos == 'RP':
            FIP_WAR = float(FIP_WAR)
            ERA_WAR = float(ERA_WAR)

        if k_pct is not None and bb_pct is not None:
            k_minus_bb_pct = float(k_pct) - float(bb_pct)
        else:
            k_minus_bb_pct = None

        entry['year'] = year
        entry['player_name'] = player_name
        entry['team_abb'] = team_abb
        entry['age'] = age
        entry['hand'] = hand
        entry['pos'] = pos
        entry['pf'] = pf
        entry['g'] = g
        entry['gs'] = gs
        entry['ip'] = ip
        entry['babip'] = babip
        entry['k_9'] = k_9
        entry['bb_9'] = bb_9
        entry['k_bb'] = k_bb
        entry['hr_9'] = hr_9
        entry['k_pct'] = k_pct
        entry['bb_pct'] = bb_pct
        entry['k_minus_bb_pct'] = k_minus_bb_pct
        entry['zERA_plus'] = zera_plus
        entry['zERA_minus'] = zera_minus
        entry['zFIP'] = zfip
        entry['zWAR'] = zwar
        entry['FIP'] = FIP
        entry['park_FIP'] = park_FIP
        entry['FIP_minus'] = FIP_min
        entry['FIP_WAR'] = FIP_WAR
        entry['ERA'] = era
        entry['park_ERA'] = park_ERA
        entry['ERA_minus'] = ERA_min
        entry['ERA_WAR'] = ERA_WAR
        entry['yr1_WAR'] = yr1_WAR
        entry['yr1_value'] = yr1_value
        entry['yr2_WAR'] = yr2_WAR
        entry['yr2_value'] = yr2_value
        entry['yr3_WAR'] = yr3_WAR
        entry['yr3_value'] = yr3_value
        entry['yr4_WAR'] = yr4_WAR
        entry['yr4_value'] = yr4_value
        entry['yr5_WAR'] = yr5_WAR
        entry['yr5_value'] = yr5_value
        entry['yr6_WAR'] = yr6_WAR
        entry['yr6_value'] = yr6_value
        entry['yr7_WAR'] = yr7_WAR
        entry['yr7_value'] = yr7_value
        entry['yr8_WAR'] = yr8_WAR
        entry['yr8_value'] = yr8_value

        entries.append(entry)

    table = 'zips_fangraphs_prep_FA_pitchers'
    print table
    if entries != []:
        for i in range(0, len(entries), 1000):
            db.insertRowDict(entries[i:i + 1000],
                             table,
                             insertMany=True,
                             replace=True,
                             rid=0,
                             debug=1)
            db.conn.commit()
def process_basic(year):
    print 'initial table setup'
    for _type in ('roster', 'projected'):

        basic_query = """SELECT
        team_abb, team_name,
        year, season_gp, games_played, current_W, current_L,
        overall_var,
        roster_W, roster_L, roster_pct,
        ros_W, ros_L, ros_pct,
        projected_W, projected_L, projected_pct
        FROM __team_strength t1
        JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __team_strength GROUP BY team_abb, year) t2 USING (team_abb, year, games_played)
        WHERE year = %s;"""

        basic_query = basic_query % (year)

        # raw_input(basic_query)

        basic_res = db.query(basic_query)

        for basic_row in basic_res:
            entry = {}
            team_abb, team_name, year, season_gp, games_played, cur_W, cur_L, overall_var, roster_W, roster_L, roster_pct, ros_W, ros_L, ros_pct, projected_W, projected_L, projected_pct = basic_row

            games_played = float(games_played)
            games_remaining = float(max(0.0, 162.0 - games_played))

            # linearly scaled variance (no variance at game 162, full variance at game 0)
            projected_var = max(0.001,
                                float(overall_var) * (games_remaining / 162.0))

            projected_std = max(
                0.001,
                math.sqrt(float(overall_var)) * (games_remaining / 162.0))

            division, div_teams, conf_teams, non_conf_teams = helper.get_division(
                team_name, year)

            if _type == 'roster':
                p_95 = float(roster_W) + 1.96 * math.sqrt(float(overall_var))
                p_75 = float(roster_W) + 1.15 * math.sqrt(float(overall_var))
                p_25 = float(roster_W) - 1.15 * math.sqrt(float(overall_var))
                p_05 = float(roster_W) - 1.96 * math.sqrt(float(overall_var))
                entry['team_abb'] = team_abb
                entry['team_name'] = team_name
                entry['year'] = year
                entry['season_gp'] = season_gp
                entry['games_played'] = games_played
                entry['division'] = division
                entry['strength_type'] = _type
                entry['strength_pct'] = roster_pct
                entry['var'] = overall_var
                entry['mean_W'] = roster_W
                entry['mean_L'] = roster_L
                entry['p_95'] = p_95
                entry['p_75'] = p_75
                entry['p_25'] = p_25
                entry['p_05'] = p_05

            elif _type == 'projected':
                p_95 = float(projected_W) + 1.96 * (projected_std)
                p_75 = float(projected_W) + 1.15 * (projected_std)
                p_25 = float(projected_W) - 1.15 * (projected_std)
                p_05 = float(projected_W) - 1.96 * (projected_std)

                entry['team_abb'] = team_abb
                entry['team_name'] = team_name
                entry['year'] = year
                entry['season_gp'] = season_gp
                entry['games_played'] = games_played
                entry['division'] = division
                entry['strength_type'] = _type
                entry['strength_pct'] = ros_pct
                entry['var'] = projected_var
                entry['mean_W'] = projected_W
                entry['mean_L'] = projected_L
                entry['p_95'] = p_95
                entry['p_75'] = p_75
                entry['p_25'] = p_25
                entry['p_05'] = p_05

            db.insertRowDict(entry,
                             '__playoff_probabilities',
                             insertMany=False,
                             replace=True,
                             rid=0,
                             debug=1)
            db.conn.commit()
Exemple #23
0
    "zips_fangraphs_pitchers_counting": "a.Player",
    "zips_fangraphs_pitchers_rate": "a.Player",
    "zips_offense": "a.player_name",
    "zips_offense_splits": "a.player_name",
    "zips_pitching": "a.player_name",
    "zips_pitching_splits": "a.player_name",
    "mlb_prospects.fg_raw": "a.playerName",
    "mlb_prospects.minorleagueball_professional": "a.full_name",
    "mlb_prospects.mlb_prospects_draft": "CONCAT(a.fname, ' ', a.lname)",
    "mlb_prospects.mlb_prospects_international":
    "CONCAT(a.fname, ' ', a.lname)",
    "mlb_prospects.mlb_prospects_professional": "CONCAT(a.fname, ' ', a.lname)"
}

for k, v in table_dict.items():
    print k
    qry = """
    SELECT DISTINCT %s
    FROM %s a
    LEFT JOIN name_mapper nm ON (%s = nm.wrong_name)
    WHERE 1
        AND nm.wrong_name IS NULL
    """ % (v, k, v)

    # raw_input(qry)

    names = db.query(qry)

    for name in names:
        helper.input_name(name[0])
def parse_player(player_text, year, team_abb):
    try:
        int(player_text[0:1])
    except ValueError:
        # raw_input(player_text)
        return None

    try:
        full_name = player_text.split(")")[1].split(",")[0].strip()
    except IndexError:
        return None

    try:
        team_rank = player_text.split(")")[0].strip()

    except IndexError:
        team_rank = None

    try:
        position = player_text.split(",")[1].split(",")[0].split(";")[0].strip().split(" ")[0].split(".")[0].strip()
    except IndexError:
        position = None

    try:
        grade_base = player_text.upper().split("GRADE")[1].split(":")[0].split(".")[0].split(";")[0]
        grade = grade_base.replace("/BORDERLINE","/").replace("BORDERLINE","/").replace("//","/").replace(" ","").strip()
    except IndexError:
        # raw_input(player_text)
        grade_base, grade = None, None


    try:
        age = player_text.lower().split(" age")[1].split(",")[0].split(";")[0].split(":")[0].split("(")[0].strip()
        age = int(age)
    except (IndexError, ValueError):
        try:
            age = player_text.lower().split("age")[1].split(",")[0].split(";")[0].strip()
            age = int(age)
        except (IndexError, ValueError):
            age = 0

    try:
        eta = player_text.lower().split(" eta")[1].split(".")[0].split(";")[0].split("(")[0].replace(":","").strip()
    except IndexError:
        eta = None

    entry = {"year":year, "team":team_abb}

    full_name, fname, lname = helper.adjust_minorleagueball_name(full_name, year, team_abb)

    est_birthyear = year - int(age)
    age = helper.adjust_minorleagueball_birthyear(full_name, year, team_abb, est_birthyear)

    position = helper.adjust_minorleagueball_position(full_name, year, team_abb, position)

    eta = helper.adjust_minorleagueball_eta(full_name, year, team_abb, eta)

    if grade is None:
        return None

    try:
        blurb = player_text.split("Grade"+grade_base+":")[1].strip()
    except (TypeError, IndexError):
        try:
            blurb = "Age " + player_text.split("Age")[1].strip()
        except (TypeError, IndexError):
            blurb = None

    try:
        grade_split = blurb.upper().split("BORDERLINE")[1].split(":")[0].split(".")[0].strip()[0:2].strip()
        if grade_split != "":
            grade = grade + "/" + grade_split
    except (IndexError, AttributeError):
        grade = grade

    grade = helper.adjust_minorleagueball_grade(full_name, year, team_abb, grade)

    if int(team_rank) == 31 and grade[0] in ("A", "B"):
        team_rank = 1

    entry["team_rank"] = team_rank
    entry["full_name"] = full_name
    entry["position"] = position
    entry["age"] = age
    entry["grade"] = grade
    entry["eta"] = eta
    entry["fname"] = fname
    entry["lname"] = lname
    entry["blurb"] = blurb
    print "\t\t", team_rank, full_name, position, age, grade, eta

    helper2.input_name(entry.get('full_name'))
    db.insertRowDict(entry, "minorleagueball_professional", replace=True, debug=1)
    db.conn.commit()
Exemple #25
0
def process():
    print "processed_team_standings_advanced"
    table = 'processed_team_standings_advanced'
    db.query("TRUNCATE TABLE `" + table + "`")

    entries = []
    teamWAR_qry = """SELECT
    year,
    team_abb,
    dWAR,
    oWAR,
    (replacement/10) as repWAR,
    FIP_WAR,
    ERA_WAR
    FROM processed_WAR_team
    """

    team_WAR_list = db.query(teamWAR_qry)

    for team in team_WAR_list:
        year, team_abb, dWAR, oWAR, repWAR, FIP_WAR, ERA_WAR = team

        mascot_name = helper.get_mascot_names(team_abb.upper(), year)

        #a full season is ~17 replacement wins?
        repWAR = float(repWAR)

        pos_WAR = float(dWAR) + float(oWAR) + repWAR
        fWAR = pos_WAR + float(FIP_WAR)
        rWAR = pos_WAR + float(ERA_WAR)

        if team_abb == '':
            continue
        else:
            record_q = """SELECT
    year,
    team_name, 
    games_played, 
    w,
    l,
    rf,
    ra
    FROM team_standings
    WHERE team_name LIKE '%%%s%%'
    AND year = %s
    AND games_played = (SELECT MAX(games_played) FROM team_standings WHERE team_name LIKE '%%%s%%' AND year = %s)
    """
            record_qry = record_q % (mascot_name, year, mascot_name, year)
            # raw_input(record_qry)

            record = db.query(record_qry)[0]

            year, team_name, games_played, w, l, rf, ra = record

            # http://www.had2know.com/sports/pythagorean-expectation-win-percentage-baseball.html
            pythag_x = ((float(rf) + float(ra)) /
                        (float(w) + float(l)))**(float(0.285))
            pythag_win_pct = (float(rf)**pythag_x) / ((float(rf)**pythag_x) +
                                                      (float(ra)**pythag_x))
            pythag_wins = (w + l) * pythag_win_pct
            pythag_losses = games_played - (pythag_wins)

            if year < 2017:
                rep_team_win_pct = 0.300
            else:
                rep_team_win_pct = 0.325
            rep_team_wins = rep_team_win_pct * games_played

            # f_wins = (pos_WAR/repWAR)*17.0 + float(FIP_WAR) + rep_team_wins
            # f_losses = games_played - (f_wins)
            # r_wins = (pos_WAR/repWAR)*17.0 + float(ERA_WAR) + rep_team_wins
            # r_losses = games_played - (r_wins)

            f_wins = fWAR + rep_team_wins
            f_losses = games_played - (f_wins)
            r_wins = rWAR + rep_team_wins
            r_losses = games_played - (r_wins)

            entry = {
                "year": year,
                "team_name": team_name,
                "games_played": games_played,
                "repWAR": repWAR,
                "oWAR": oWAR,
                "dWAR": dWAR,
                "FIP_WAR": FIP_WAR,
                "ERA_WAR": ERA_WAR,
                "RF": rf,
                "RA": ra,
                "f_Wins": f_wins,
                "f_Losses": f_losses,
                "r_Wins": r_wins,
                "r_Losses": r_losses,
                "py_Wins": pythag_wins,
                "py_Losses": pythag_losses,
                "W": w,
                "L": l
            }

            entries.append(entry)

    if entries != []:
        db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0)
    db.conn.commit()
Exemple #26
0
def process(curr_year):
    rosters_link = '/Users/connordog/Dropbox/Desktop_Files/Baseball/Rosters.xlsx'

    season_gp = db.query(
        "SELECT gs FROM processed_league_averages_pitching WHERE year = %s" %
        (curr_year))
    if season_gp == ():
        season_gp = 0
    else:
        season_gp = float(season_gp[0][0]) / 2

    workbook = xlrd.open_workbook(rosters_link)

    # iterate through all team sheets
    for index in range(4, 34):
        team_name = workbook.sheet_names()[index]

        print team_name

        team_abbs, primary_abb = helper.get_team_abbs(team_name.upper())

        entries = []

        team_sheet = workbook.sheet_by_index(index)

        # get a maximum row for each sheet
        for row in range(1, 100):
            if team_sheet.cell(row, 1).value == 'Waived Players':
                max_row = row
                break

        position = ''
        for row in range(8, max_row):
            if team_sheet.cell(row, 1).value == 'Pitchers':
                position = 'p'
            if team_sheet.cell(row, 1).value == 'Catchers':
                position = 'c'
            if team_sheet.cell(row, 1).value == 'Infielders':
                position = 'if'
            if team_sheet.cell(row, 1).value == 'Outfielders':
                position = 'of'

            entered_name = team_sheet.cell(row, 1).value
            if position == 'c' and entered_name == 'Smith, Will':
                entered_name = 'D. Smith, Will'
            player_name, first_name, last_name = name_parser(
                entered_name, primary_abb)

            if team_sheet.cell(
                    row, 2).value not in ('Year', '') and team_sheet.cell(
                        row, 3).value not in ('Salary', ''):

                salary = team_sheet.cell(row, 3).value
                year = team_sheet.cell(row, 2).value
                expires = team_sheet.cell(row, 4).value
                opt = team_sheet.cell(row, 5).value
                NTC = team_sheet.cell(row, 8).value
                salary_counted = team_sheet.cell(row, 9).value

                entry = {
                    'year': curr_year,
                    'gp': season_gp,
                    'player_name': player_name,
                    "fname": first_name,
                    "lname": last_name,
                    "team_abb": primary_abb,
                    "position": position,
                    "salary": salary,
                    "contract_year": year,
                    "expires": expires,
                    "opt": opt,
                    "NTC": NTC,
                    "salary_counted": salary_counted,
                    "entered_name": entered_name
                }
                # print entry
                entries.append(entry)

        if entries != []:
            db.insertRowDict(entries,
                             'excel_rosters',
                             replace=True,
                             insertMany=True,
                             rid=0)
        db.conn.commit()
Exemple #27
0
def pitching_war(year):
    player_q = """SELECT
    player_name,
    team_abb,
    position,
    age, 
    g, 
    gs,
    era,
    ROUND(ip) + (10 * (ip - ROUND(ip)) / 3) as ip,
    h, r, er, bb, k, hr
    FROM register_pitching_primary
    WHERE year = %s;
    """

    player_qry = player_q % (year)
    player_data = db.query(player_qry)

    entries = []
    for row in player_data:
        entry = {}
        player_name, team_abb, position, age, g, gs, era, ip, h, r, er, bb, k, hr = row
        entry['year'] = year
        entry['player_name'] = player_name
        entry['team_abb'] = team_abb
        entry['position'] = position
        throws = None
        entry['throws'] = throws
        entry['age'] = age
        entry['ip'] = ip

        team_abb = team_abb.upper()
        pf = float(helper.get_park_factors(team_abb, year)) / float(100)
        entry['pf'] = pf

        if ip == 0:
            k_9 = 0.0
            if bb > 0:
                bb_9 = 99.0
                k_bb = 99.0
            else:
                bb_9 = 0.0
                k_bb = 0.0
            if hr > 0:
                hr_9 = 99.0
            else:
                hr_9 = 0.0
        else:
            k_9 = (float(k) / float(ip)) * 9
            bb_9 = (float(bb) / float(ip)) * 9
            hr_9 = (float(hr) / float(ip)) * 9
            if bb == 0:
                if k > 0:
                    k_bb = 99.0
                else:
                    k_bb = 0.0
            else:
                k_bb = (float(k) / float(bb))

        entry['k_9'] = k_9
        entry['bb_9'] = bb_9
        entry['k_bb'] = k_bb
        entry['hr_9'] = hr_9

        fip_const = float(helper.get_league_average_pitchers(
            year, 'fip_const'))
        if ip == 0:
            FIP = 99.99
        else:
            FIP = ((((13 * float(hr)) + (3 * float(bb)) -
                     (2 * float(k))) / float(ip)) + fip_const)
        entry['FIP'] = FIP

        park_FIP, FIP_min, FIP_WAR = helper.get_pitching_metrics(
            FIP, ip, year, pf, g, gs, 'fip')

        entry['park_FIP'] = park_FIP
        entry['FIP_minus'] = FIP_min
        entry['FIP_WAR'] = FIP_WAR

        ERA = float(era)
        entry['ERA'] = ERA

        park_ERA, ERA_min, ERA_WAR = helper.get_pitching_metrics(
            ERA, ip, year, pf, g, gs, 'era')

        entry['park_ERA'] = park_ERA
        entry['ERA_minus'] = ERA_min
        entry['ERA_WAR'] = ERA_WAR

        entries.append(entry)

    table = 'processed_WAR_pitchers'
    if entries != []:
        db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0)
    db.conn.commit()