Esempio n. 1
0
def create_hitter_valuations(league, stats):
    from general import utilities
    from analysis import calculations
    from analysis import player_pool_stats

    assert league.league_name in ['SoS', 'Legacy']
    hitters = stats
    hitters['type'] = 'B'

    pa_threshold = hitters['pa'].quantile(.9)
    hitters['sample'] = hitters.apply(lambda row: row['pa'] > pa_threshold,
                                      axis=1)

    for run in range(1, 3):
        hitters = calculations.calc_z(df=hitters, ls=league, type='batting')
        hitters['sample'] = hitters.apply(lambda row: row.zar > 0, axis=1)

    columns = [
        'name', 'fg_id', 'team', 'type', 'elig', 'pa',
        league.hitting_counting_stats, league.hitting_rate_stats, 'zar',
        'value'
    ]
    columns = utilities.flatten(columns)
    hitters = hitters[columns]
    hitters.reset_index(inplace=True)
    return hitters
Esempio n. 2
0
def value_pool_hitting(league, player_pool, type):
    from datetime import date
    from general import utilities
    from analysis import calculations
    from analysis import player_pool_stats

    assert type in ['B', 'P']
    player_pool['type'] = type

    # calc % of season left for determining PAs for sample threshold
    f_date = date(2021, 4, 1)
    l_date = date(2021, 9, 30)
    season_length = l_date - f_date
    pct_through = (l_date - date.today()).days / (l_date - f_date).days

    #player_pool['sample'] = player_pool['pa'] > 500*pct_through
    player_pool['sample'] = True
    for run in range(1, 3):
        player_pool = calculations.calc_z(df=player_pool,
                                          ls=league,
                                          type='hitting')
        player_pool['sample'] = player_pool.apply(lambda row: row.zar > 0,
                                                  axis=1)

    columns = [
        'name', 'fg_id', 'type', 'elig', 'pa', league.hitting_counting_stats,
        league.hitting_rate_stats, 'zar', 'value'
    ]
    columns = utilities.flatten(columns)
    player_pool = player_pool[columns]
    return player_pool
Esempio n. 3
0
def create_combined_pitcher_valuations(league):
    from general import utilities
    from analysis import calculations
    from analysis import player_pool_stats

    assert league.league_name in ['SoS', 'Legacy']
    combined_pitchers = player_pool_stats.create_combined_pitchers(league)
    combined_pitchers['type'] = 'P'

    combined_pitchers['sample'] = True
    for run in range(1, 3):
        combined_pitchers = calculations.calc_z(df=combined_pitchers,
                                                ls=league,
                                                type='pitching')
        combined_pitchers['sample'] = combined_pitchers.apply(
            lambda row: row.zar > 0, axis=1)

    columns = [
        'name', 'fg_id', 'team', 'type', 'elig', 'ip',
        league.pitching_counting_stats, league.pitching_rate_stats, 'zar',
        'value', 'zar_skills', 'rank_sp', 'rank_rp'
    ]
    columns = utilities.flatten(columns)
    combined_pitchers = combined_pitchers[columns]
    return combined_pitchers
Esempio n. 4
0
def create_pitcher_valuations(league, stats):
    from general import utilities
    from analysis import calculations
    from analysis import player_pool_stats

    assert league.league_name in ['SoS', 'Legacy']
    pitchers = stats
    pitchers['type'] = 'P'

    pitchers['sample'] = pitchers.apply(lambda row: not (row['era'] == float(
        'inf') or row['whip'] == float('inf')),
                                        axis=1)

    for run in range(1, 3):
        pitchers = calculations.calc_z(df=pitchers, ls=league, type='pitching')
        pitchers['sample'] = pitchers.apply(lambda row: row.zar > 0, axis=1)

    columns = [
        'name', 'fg_id', 'team', 'type', 'elig', 'ip',
        league.pitching_counting_stats, league.pitching_rate_stats, 'zar',
        'value', 'zar_skills', 'rank_sp', 'rank_rp'
    ]
    columns = utilities.flatten(columns)
    pitchers = pitchers[columns]
    return pitchers
Esempio n. 5
0
def create_actuals_pitchers(ls, year=2021):
    import pandas as pd

    from general import utilities
    from general import postgres
    from munging import player_names

    bbdb = postgres.connect_to_bbdb()
    query = (
        'SELECT pit_std.year, pit_std.bbref_id, pit_std."Tm" as team, pit_std."IP" as ip, pit_start."GS" as gs, pit_start."QS" as qs, pit_std."SO" as so, pit_std."ERA" as era, pit_std."WHIP" as whip, pit_relief."SV" as sv, pit_relief."Hold" as hld FROM '
        + '(SELECT * FROM tracking.bbref_pitching_standard) as pit_std ' +
        'LEFT JOIN (SELECT * FROM tracking.bbref_pitching_starter) as pit_start ON pit_std.bbref_id=pit_start.bbref_id AND pit_std.year=pit_start.year AND pit_std."Tm"=pit_start."Tm" '
        +
        'LEFT JOIN (SELECT * FROM tracking.bbref_pitching_reliever) as pit_relief ON pit_std.bbref_id=pit_relief.bbref_id AND pit_std.year=pit_relief.year AND pit_std."Tm"=pit_relief."Tm" '
        + 'WHERE pit_std.year=' + str(year))
    df = pd.read_sql_query(query, bbdb)
    df['ip'] = df['ip'].str.replace('.1', '.33', regex=False)
    df['ip'] = df['ip'].str.replace('.2', '.67', regex=False)
    df = df.fillna(value={
        'era': 0,
        'whip': 0,
        'gs': 0,
        'qs': 0,
        'sv': 0,
        'hld': 0
    })
    for c in ['gs', 'qs', 'so', 'sv', 'hld']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(int)
    for c in ['ip', 'era', 'whip']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(float)
    df['svhld'] = df['sv'] + df['hld']
    df = df[(df['bbref_id'].notnull()) & (df['bbref_id'] != u'')]

    # merge in the names and reorder
    names = player_names.get_player_names()
    combined_pitchers = df.merge(names[['bbref_id', 'fg_id', 'name']],
                                 on='bbref_id',
                                 how='left')
    output_stats = utilities.flatten([['fg_id', 'name', 'team', 'ip'],
                                      [ls.pitching_stats]])
    combined_pitchers = combined_pitchers[output_stats]
    combined_pitchers.drop_duplicates(inplace=True)

    return combined_pitchers
Esempio n. 6
0
def create_actuals_hitters(ls, year=2021):
    import pandas as pd
    from general import utilities
    from general import postgres
    from general import classes
    from munging import player_names

    bbdb = postgres.connect_to_bbdb()

    if year == 2021:
        tablename = 'tracking'
    else:
        tablename = 'reference'

    query = (
        'SELECT year, bbref_id, bat."Tm" as team, bat."PA" as pa, ' +
        'bat."HR" as hr, bat."R" as r, bat."RBI" as rbi, bat."SB" as sb, bat."OBP" as obp, bat."OPS" as ops '
        + 'FROM ' + tablename + '.bbref_batting_standard bat WHERE year=' +
        str(year))
    df = pd.read_sql_query(query, bbdb)
    df = df.fillna(value={
        'obp': 0,
        'ops': 0,
        'pa': 0,
        'r': 0,
        'rbi': 0,
        'sb': 0
    })
    for c in ['pa', 'r', 'rbi', 'hr', 'sb']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(int)
    for c in ['obp', 'ops']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(float)
    df = df[(df['bbref_id'].notnull()) & (df['bbref_id'] != u'')]

    # merge in the names and reorder
    names = player_names.get_player_names()
    combined_hitters = df.merge(names[['bbref_id', 'fg_id', 'name']],
                                on='bbref_id',
                                how='left')
    output_stats = utilities.flatten(
        [['fg_id', 'bbref_id', 'name', 'team', 'pa'], [ls.hitting_stats]])
    combined_hitters = combined_hitters[output_stats]
    combined_hitters.drop_duplicates(inplace=True)
    return combined_hitters
Esempio n. 7
0
def create_combined_hitter_valuations(league):
    from datetime import date
    from general import utilities
    from analysis import calculations
    from analysis import player_pool_stats

    assert league.league_name in ['SoS', 'Legacy']
    combined_hitters = player_pool_stats.create_combined_hitters(league)
    combined_hitters['type'] = 'B'

    # calc % of season left for determining PAs for sample threshold
    f_date = date(2021, 4, 1)
    l_date = date(2021, 9, 30)
    season_length = l_date - f_date
    pct_through = (l_date - date.today()).days / (l_date - f_date).days

    combined_hitters['sample'] = combined_hitters['pa'] > 500 * pct_through
    for run in range(1, 3):
        combined_hitters = calculations.calc_z(df=combined_hitters,
                                               ls=league,
                                               type='hitting')
        combined_hitters['sample'] = combined_hitters.apply(
            lambda row: row.zar > 0, axis=1)

    combined_hitters_600 = player_pool_stats.create_combined_hitters(league,
                                                                     pa=600)
    combined_hitters_600['type'] = 'B'
    combined_hitters_600 = combined_hitters_600.merge(
        combined_hitters[['fg_id', 'sample']], how='left', on='fg_id')
    combined_hitters_600 = calculations.calc_z(df=combined_hitters_600,
                                               ls=league,
                                               type='hitting')
    combined_hitters = combined_hitters.merge(
        combined_hitters_600[['fg_id',
                              'value']].rename(columns={'value': 'value_600'}),
        how='left',
        on='fg_id')

    columns = [
        'name', 'fg_id', 'type', 'elig', 'pa', league.hitting_counting_stats,
        league.hitting_rate_stats, 'zar', 'value', 'value_600'
    ]
    columns = utilities.flatten(columns)
    combined_hitters = combined_hitters[columns]
    return combined_hitters
Esempio n. 8
0
def create_last30_hitters(ls):
    import pandas as pd
    from general import utilities
    from general import postgres
    from general import classes
    from munging import player_names

    bbdb = postgres.connect_to_bbdb()

    query = (
        'SELECT bat.fg_id, bat.team, bat.pa, ' +
        'bat.hr, bat.r, bat.rbi, bat.sb, bat.obp, bat.obp+bat.slg as ops ' +
        'FROM tracking.batters_last30 AS bat')
    df = pd.read_sql_query(query, bbdb)
    df = df.fillna(value={
        'obp': 0,
        'ops': 0,
        'pa': 0,
        'r': 0,
        'rbi': 0,
        'sb': 0
    })
    for c in ['pa', 'r', 'rbi', 'hr', 'sb']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(int)
    for c in ['obp', 'ops']:
        df[c] = df[c].replace(r'^\s*$', 0, regex=True)
        df[c] = df[c].astype(float)
    #df = df[(df['fg_id'].notnull()) & (df['fg_id']!=u'')]

    # merge in the names and reorder
    names = player_names.get_player_names()
    combined_hitters = df.merge(names[['fg_id', 'name']],
                                on='fg_id',
                                how='left')
    output_stats = utilities.flatten([['fg_id', 'name', 'team', 'pa'],
                                      [ls.hitting_stats]])
    combined_hitters = combined_hitters[output_stats]
    combined_hitters.drop_duplicates(inplace=True)
    return combined_hitters
Esempio n. 9
0
def calc_z(df, ls, type):
    import json
    import sys
    from general import utilities
    from analysis import elig

    assert type in ['hitting', 'batting', 'pitching']
    if (type == 'batting' or type == 'hitting'):
        counting_stats = ls.hitting_counting_stats
        rate_stats = ls.hitting_rate_stats
        players_per_team = ls.hitters_per_team
        budget_split = ls.batting_split
        denom = 'pa'
    if (type == 'pitching'):
        counting_stats = ls.pitching_counting_stats
        rate_stats = ls.pitching_rate_stats
        players_per_team = ls.pitchers_per_team
        budget_split = 1 - ls.batting_split
        denom = 'ip'

    stats = utilities.flatten([counting_stats, rate_stats])

    # if 'sample' is not predefined then use entire data set
    if (('sample' in df.columns) == False):
        df['sample'] = True

    # calculate mean and standard deviation
    mean = df[df['sample'] == True].mean()
    sd = df[df['sample'] == True].std()

    for var in stats:
        var_z = var + '_z'
        df[var_z] = df.apply(lambda row: (row[var] - mean[var]) / sd[var],
                             axis=1)
        if (var in rate_stats):
            df[var_z] = df.apply(
                lambda row: row[var_z] * row[denom] / mean[denom], axis=1)
            if (type == 'pitching'):
                df[var_z] = -df[var_z]
        df[var_z] = ls.z_weights[var] * df[var_z]

    df['z'] = 0
    for var in stats:
        df['z'] = df.apply(lambda row: (row['z'] + row[var + '_z']), axis=1)

    df['rank'] = df['z'].rank(method='average', ascending=False)

    marginal_z = df[df['rank'] == ls.num_teams *
                    players_per_team]['z'].to_list()[0]
    df['zar'] = df.apply(lambda row: (row['z'] - marginal_z), axis=1)

    # Catcher adjustment
    if (type == 'batting' or type == 'hitting'):
        if not ('elig' in df.columns):
            eligibilities = elig.get_eligibilities('SoS')
            df = df.merge(eligibilities[[
                'fg_id', 'elig'
            ]][eligibilities['fg_id'].isna() == False],
                          on='fg_id',
                          how='left')
        df['catcher'] = df.apply(
            lambda row: 'C' in str(row['elig']).split(' '), axis=1)
        catchers = df[df['catcher']].copy()
        catchers['rank'] = catchers['zar'].rank(ascending=False)
        catcher_repl = catchers.iloc[16]['zar']

        def add_catcher_repl(row):
            if row['catcher']:
                return row['zar'] - catcher_repl
            else:
                return row['zar']

        df['zar'] = df.apply(lambda row: add_catcher_repl(row), axis=1)
        del (df['catcher'])

    if (type == 'pitching'):
        df['zar_skills'] = df['era_z'] + df['whip_z'] + df['so_z']
        if (ls.league_name == 'SoS'):
            df['elig'] = df.apply(lambda row: 'sp'
                                  if (row['qs'] > 0) else 'rp',
                                  axis=1)
        elif (ls.league_name == 'Legacy'):
            df['elig'] = df.apply(
                lambda row: 'rp'
                if (row['svhld'] > 3 or row['ip'] < 30) else 'sp',
                axis=1)
        df['rank_sp'] = df[df['elig'] == 'sp']['zar'].rank(ascending=False)
        df['rank_rp'] = df[df['elig'] == 'rp'].groupby('team')['zar'].rank(
            ascending=False)

    sum_zar = df[df['zar'] >= 0]['zar'].sum()
    df['value'] = df.apply(lambda row: (
        (ls.num_teams * 260 * budget_split) * row['zar'] / sum_zar),
                           axis=1)

    df = df.sort_values(by='value', ascending=False)

    return df
Esempio n. 10
0
def update_inseason_valuations(league_sos, league_legacy):
    import pandas as pd
    import gspread
    import gspread_dataframe as gsdf

    from general import gs
    from general import utilities
    from general import postgres

    sos_hitters = create_combined_hitter_valuations(league=league_sos) \
        .rename(columns={'zar': 'zar_sos', 'value': 'value_sos', 'value_600': 'value_600_sos'})
    legacy_hitters = create_combined_hitter_valuations(league=league_legacy) \
        .rename(columns={'zar': 'zar_legacy', 'value': 'value_legacy', 'value_600': 'value_600_legacy'})
    legacy_extra_columns = list(
        set(legacy_hitters.columns).difference(sos_hitters.columns))
    legacy_extra_columns = utilities.flatten(['fg_id', legacy_extra_columns])

    columns = [
        'name', 'fg_id', 'type', 'elig', 'pa',
        league_sos.hitting_counting_stats, league_sos.hitting_counting_stats,
        league_legacy.hitting_rate_stats, league_legacy.hitting_rate_stats,
        'value_sos', 'value_600_sos', 'value_legacy', 'value_600_legacy'
    ]
    columns = utilities.flatten(columns)
    combined_hitters = sos_hitters.merge(legacy_hitters[legacy_extra_columns],
                                         on='fg_id')
    combined_hitters.drop_duplicates(subset=['fg_id'], inplace=True)

    # Merge in the ownership
    bbdb = postgres.connect_to_bbdb()
    sos_rosters = pd.read_sql(
        'SELECT fg_id, sos."Team" as sos_team FROM rosters.sos', con=bbdb)
    sos_rosters[['fg_id']] = sos_rosters[['fg_id']].astype(str)
    combined_hitters = combined_hitters.merge(sos_rosters,
                                              how='left',
                                              on='fg_id')

    legacy_rosters = pd.read_sql(
        'SELECT fg_id, legacy."Team" as legacy_team FROM rosters.legacy',
        con=bbdb)
    legacy_rosters[['fg_id']] = legacy_rosters[['fg_id']].astype(str)
    combined_hitters = combined_hitters.merge(legacy_rosters,
                                              how='left',
                                              on='fg_id')
    combined_hitters.drop(combined_hitters[
        (combined_hitters['fg_id'] == '19755')
        & (combined_hitters['legacy_team'] == 'Harper Wallbanger')].index,
                          inplace=True)

    # Pitchers
    sos_pitchers = create_combined_pitcher_valuations(league=league_sos) \
        .rename(columns={'zar': 'zar_sos', 'value': 'value_sos'})
    legacy_pitchers = create_combined_pitcher_valuations(league=league_legacy) \
        .rename(columns={'zar': 'zar_legacy', 'value': 'value_legacy'})
    legacy_extra_columns = list(
        set(legacy_pitchers.columns).difference(sos_pitchers.columns))
    legacy_extra_columns = utilities.flatten(['fg_id', legacy_extra_columns])

    columns = [
        'name', 'fg_id', 'type', 'ip', league_sos.pitching_counting_stats,
        league_legacy.pitching_counting_stats, league_sos.pitching_rate_stats,
        league_legacy.pitching_rate_stats, 'zar_sos', 'value_sos',
        'zar_legacy', 'value_legacy'
    ]
    columns = utilities.flatten(columns)
    combined_pitchers = sos_pitchers.merge(
        legacy_pitchers[legacy_extra_columns], on='fg_id')
    combined_pitchers = combined_pitchers[columns]

    # Merge in CFIP
    bbdb = postgres.connect_to_bbdb()
    cfip = pd.read_sql('SELECT * FROM hist.bp_pitchers_raw', con=bbdb)
    combined_pitchers = combined_pitchers.merge(cfip[['fg_id', 'DRA', 'cFIP']],
                                                how='left',
                                                on='fg_id')

    # Merge in xxxFIP
    bbdb = postgres.connect_to_bbdb()
    cfip = pd.read_sql('SELECT * FROM tracking.xxxfip WHERE fg_id IS NOT NULL',
                       con=bbdb)
    combined_pitchers = combined_pitchers.merge(cfip[['fg_id', 'xxxFIP']],
                                                how='left',
                                                on='fg_id')

    # Merge in the ownership
    combined_pitchers = combined_pitchers.merge(sos_rosters,
                                                how='left',
                                                on='fg_id')
    combined_pitchers = combined_pitchers.merge(legacy_rosters,
                                                how='left',
                                                on='fg_id')
    combined_pitchers.drop(combined_pitchers[
        (combined_pitchers['fg_id'] == '19755')
        & (combined_pitchers['legacy_team'] == 'Florun\'s Team')].index,
                           inplace=True)

    # Update Google Sheets
    gc = gspread.service_account(filename='./bb-2021-2b810d2e3d25.json')
    sh = gc.open("BB 2021 InSeason").worksheet('Proj - Hitters')
    gsdf.set_with_dataframe(sh, combined_hitters)
    gs.format_gsheet(sheet=sh)
    sh = gc.open("BB 2021 InSeason").worksheet('Proj - Pitchers')
    gsdf.set_with_dataframe(sh, combined_pitchers)
    gs.format_gsheet(sheet=sh)
Esempio n. 11
0
    def __init__(self, league_type):
        import sys
        sys.path.append('python/general')
        from general import utilities

        self.league_name = league_type
        self.name = self.league_name
        self.year = 2021

        if (league_type == 'SoS'):
            self.league_platform = 'fleaflicker'
            self.league_num = '23172'

            self.num_teams = 16
            self.hitting_counting_stats = ['hr', 'r', 'rbi', 'sb']
            self.hitting_rate_stats = ['obp', 'ops']
            self.hitting_other_stats = ['ab']
            self.z_weights_nominal_hitting = {
                #'hr':1, 'r':1, 'rbi':1, 'sb':1.1, 'obp':1.3, 'ops':1.2
                'hr': 1,
                'r': 1,
                'rbi': 1,
                'sb': 1,
                'obp': 1.1,
                'ops': 1.1
            }
            self.z_weights_hitting = self.normalize_z_weights(
                self.z_weights_nominal_hitting)

            self.pitching_counting_stats = ['qs', 'so', 'sv', 'hld']
            self.pitching_rate_stats = ['era', 'whip']
            self.pitching_other_stats = ['gs', 'g']
            self.z_weights_nominal_pitching = {
                #'qs':1.2, 'so':1, 'sv':.9, 'hld':.6, 'era':1.2, 'whip':1.2
                'qs': 1,
                'so': 1.1,
                'sv': 1,
                'hld': 1,
                'era': 1,
                'whip': 1
            }
            self.z_weights_pitching = self.normalize_z_weights(
                self.z_weights_nominal_pitching)

            self.z_weights = {
                **self.z_weights_hitting,
                **self.z_weights_pitching
            }
            self.batting_split = .6
            self.hitters_per_team = 12.5
            self.pitchers_per_team = 12.5

        elif (league_type == 'Legacy'):
            self.league_platform = 'yahoo'
            self.league_num = '26574'

            self.num_teams = 12
            self.hitting_counting_stats = ['hr', 'r', 'rbi', 'sb']
            self.hitting_rate_stats = ['obp']
            self.z_weights_nominal_hitting = {
                'hr': 1,
                'r': 1,
                'rbi': 1,
                'sb': .2,
                'obp': 1
            }
            self.z_weights_hitting = self.normalize_z_weights(
                self.z_weights_nominal_hitting)

            self.pitching_counting_stats = ['ip', 'so', 'svhld']
            self.pitching_rate_stats = ['era', 'whip']
            self.z_weights_nominal_pitching = {
                'ip': 1,
                'so': 1,
                'svhld': .8,
                'era': 1,
                'whip': 1
            }
            self.z_weights_pitching = self.normalize_z_weights(
                self.z_weights_nominal_pitching)

            self.z_weights = {
                **self.z_weights_hitting,
                **self.z_weights_pitching
            }
            self.batting_split = .6
            self.hitters_per_team = 12.5
            self.pitchers_per_team = 12.5

        self.hitting_stats = utilities.flatten(
            [self.hitting_rate_stats, self.hitting_counting_stats])
        self.pitching_stats = utilities.flatten(
            [self.pitching_rate_stats, self.pitching_counting_stats])
Esempio n. 12
0
def create_combined_pitchers(ls):
    import pandas as pd
    from munging import player_names
    from general import postgres, utilities

    bbdb = postgres.connect_to_bbdb()

    query = (
        'SELECT \'razz\' as source, fg_id, ip, qs, era, whip, k as so, sv, hld '
        + 'FROM proj.razz_pitchers')
    df_razz = pd.read_sql_query(query, bbdb)
    df_razz['svhld'] = (df_razz['sv'] + df_razz['hld'])

    query = (
        'SELECT \'fg_dc\' as source, fg_id, ip, qs, era, whip, so, sv, hld ' +
        'FROM proj.fg_dc_pitchers ')
    df_fg_dc = pd.read_sql_query(query, bbdb)
    df_fg_dc['qs'] = df_fg_dc['qs'].replace({0: None})
    df_fg_dc['svhld'] = (df_razz['sv'] + df_razz['hld'])
    df = pd.concat([df_razz, df_fg_dc])

    df_ip = df[['source', 'fg_id', 'ip']]

    query_teams = 'SELECT playerid as fg_id, fg_dc_pitchers_raw."Team" as team FROM proj.fg_dc_pitchers_raw'
    df_teams = pd.read_sql_query(query_teams, bbdb)

    # if 'sample' is not predefined then use entire data set
    for var in ls.pitching_counting_stats:
        df[var] = df.apply(lambda row: 0 if pd.isna(row[var]) else row[var],
                           axis=1)

    weights = {
        'system': ['fg_dc', 'thebat', 'thebatx', 'pod', 'razz'],
        'sys_weight': [1, 1, 1.2, 0, .01]
    }
    weights = pd.DataFrame(weights)
    df = df.merge(right=weights,
                  how='left',
                  left_on='source',
                  right_on='system')

    weights_ip = {
        'system': ['fg_dc', 'thebat', 'thebatx', 'pod', 'razz'],
        'sys_weight': [.25, 0, 0, 0, .01]
    }
    weights_ip = pd.DataFrame(weights_ip)
    df_ip = df_ip.merge(right=weights_ip,
                        how='left',
                        left_on='source',
                        right_on='system')

    def weighted_average(df, data_col, weight_col, by_col):
        df['_data_times_weight'] = df[data_col] * df[weight_col]
        df['_weight_where_notnull'] = df[weight_col] * pd.notnull(df[data_col])
        g = df.groupby(by_col)
        result = g['_data_times_weight'].sum(
        ) / g['_weight_where_notnull'].sum()
        del df['_data_times_weight'], df['_weight_where_notnull']
        result = pd.DataFrame(result, columns=[data_col])
        return result

    df.loc[df['source'] == 'fg_dc', 'qs'] = None

    combined_pitchers = pd.DataFrame(df_ip['fg_id'].unique(),
                                     columns=['fg_id'])
    statlist = list(set(utilities.flatten([['ip'], ls.pitching_stats])))
    for stat in statlist:  # do this list(set(*)) to get unique values b/c ip may be in there twice
        t = weighted_average(df, stat, 'sys_weight', 'fg_id')
        combined_pitchers = combined_pitchers.merge(t, on='fg_id')

    # merge in the names and reorder
    names = player_names.get_player_names()
    combined_pitchers = combined_pitchers.merge(names[['fg_id', 'name']],
                                                on='fg_id',
                                                how='left')
    combined_pitchers = combined_pitchers.merge(df_teams,
                                                on='fg_id',
                                                how='left')
    output_stats = ['fg_id', 'name', 'team', 'ip']
    for stat in ls.pitching_stats:
        if (stat in output_stats) is False:
            output_stats.append(stat)
    combined_pitchers = combined_pitchers[output_stats]

    return combined_pitchers
Esempio n. 13
0
def create_combined_hitters(ls, pa=0):
    import pandas as pd
    from general import utilities
    from general import postgres
    from munging import player_names

    bbdb = postgres.connect_to_bbdb()
    query = (
        'SELECT proj.* FROM (' +
        'SELECT \'fg_dc\' as source, fg_id, pa, hr_pa, r_pa, rbi_pa, sb_pa, obp, ops '
        + 'FROM proj.fg_dc_batters ' + 'UNION ' +
        'SELECT \'thebat\' as source, fg_id, pa, hr_pa, r_pa, rbi_pa, sb_pa, obp, ops '
        + 'FROM proj.thebat_batters ' + 'UNION ' +
        'SELECT \'thebatx\' as source, fg_id, pa, hr_pa, r_pa, rbi_pa, sb_pa, obp, ops '
        + 'FROM proj.thebatx_batters ' + 'UNION ' +
        'SELECT \'pod\' as source, fg_id, pa, hr_pa, r_pa, rbi_pa, sb_pa, obp, ops '
        + 'FROM proj.pod_batters ' + ') AS proj')
    df = pd.read_sql_query(query, bbdb)

    query_pa = (
        'SELECT proj.* FROM (' + 'SELECT \'fg_dc\' as source, fg_id, pa ' +
        'FROM proj.fg_dc_batters ' +
        #'UNION ' +
        #'SELECT \'pod\' as source, fg_id, pa ' +
        #'FROM proj.pod_batters ' +
        ') AS proj')
    df_pa = pd.read_sql_query(query_pa, bbdb)
    df_pa.loc[df_pa['fg_id'] == 'sa3011918', 'fg_id'] = '27506'

    query_teams = 'SELECT playerid as fg_id, fg_dc_batters_raw."Team" as team FROM proj.fg_dc_batters_raw'
    df_teams = pd.read_sql_query(query_teams, bbdb)
    df_teams.loc[df_teams['fg_id'] == 'sa3011918', 'fg_id'] = '27506'

    weights = {
        'system': ['fg_dc', 'thebat', 'thebatx', 'pod'],
        'sys_weight': [1, 1, 1.2, .6]
    }
    weights = pd.DataFrame(weights)
    df = df.merge(right=weights,
                  how='left',
                  left_on='source',
                  right_on='system')

    weights_pa = {'system': ['fg_dc', 'pod'], 'sys_weight': [1, 0]}
    weights_pa = pd.DataFrame(weights_pa)
    df_pa = df_pa.merge(right=weights_pa,
                        how='left',
                        left_on='source',
                        right_on='system')

    def weighted_average(df, data_col, weight_col, by_col):
        df['_data_times_weight'] = df[data_col] * df[weight_col]
        df['_weight_where_notnull'] = df[weight_col] * pd.notnull(df[data_col])
        g = df.groupby(by_col)
        result = g['_data_times_weight'].sum(
        ) / g['_weight_where_notnull'].sum()
        del df['_data_times_weight'], df['_weight_where_notnull']
        result = pd.DataFrame(result, columns=[data_col])
        return result

    combined_hitters = pd.DataFrame(df_pa['fg_id'].unique(), columns=['fg_id'])
    for stat in ['pa']:
        t = weighted_average(df_pa, stat, 'sys_weight', 'fg_id')
        combined_hitters = combined_hitters.merge(t, on='fg_id')
        if (pa > 0):
            combined_hitters['pa'] = pa

    stats_pa = []
    for stat in ls.hitting_counting_stats:
        stats_pa.append(stat + '_pa')

    for stat in utilities.flatten([stats_pa, ls.hitting_rate_stats]):
        t = weighted_average(df, stat, 'sys_weight', 'fg_id')
        combined_hitters = combined_hitters.merge(t, on='fg_id')

    for stat in ls.hitting_counting_stats:
        stat_pa = stat + '_pa'
        combined_hitters[
            stat] = combined_hitters[stat_pa] * combined_hitters['pa']
        combined_hitters = combined_hitters.drop(columns=[stat_pa])

    # merge in the names and reorder
    names = player_names.get_player_names()
    combined_hitters = combined_hitters.merge(names[['fg_id', 'name']],
                                              on='fg_id',
                                              how='left')
    combined_hitters = combined_hitters.merge(df_teams, on='fg_id', how='left')
    output_stats = utilities.flatten([['fg_id', 'name', 'team', 'pa'],
                                      [ls.hitting_stats]])
    combined_hitters = combined_hitters[output_stats]

    return combined_hitters