def create_hitter_valuations(league, stats): from general import utilities from analysis import calculations from analysis import player_pool_stats assert league.league_name in ['SoS', 'Legacy'] hitters = stats hitters['type'] = 'B' pa_threshold = hitters['pa'].quantile(.9) hitters['sample'] = hitters.apply(lambda row: row['pa'] > pa_threshold, axis=1) for run in range(1, 3): hitters = calculations.calc_z(df=hitters, ls=league, type='batting') hitters['sample'] = hitters.apply(lambda row: row.zar > 0, axis=1) columns = [ 'name', 'fg_id', 'team', 'type', 'elig', 'pa', league.hitting_counting_stats, league.hitting_rate_stats, 'zar', 'value' ] columns = utilities.flatten(columns) hitters = hitters[columns] hitters.reset_index(inplace=True) return hitters
def value_pool_hitting(league, player_pool, type): from datetime import date from general import utilities from analysis import calculations from analysis import player_pool_stats assert type in ['B', 'P'] player_pool['type'] = type # calc % of season left for determining PAs for sample threshold f_date = date(2021, 4, 1) l_date = date(2021, 9, 30) season_length = l_date - f_date pct_through = (l_date - date.today()).days / (l_date - f_date).days #player_pool['sample'] = player_pool['pa'] > 500*pct_through player_pool['sample'] = True for run in range(1, 3): player_pool = calculations.calc_z(df=player_pool, ls=league, type='hitting') player_pool['sample'] = player_pool.apply(lambda row: row.zar > 0, axis=1) columns = [ 'name', 'fg_id', 'type', 'elig', 'pa', league.hitting_counting_stats, league.hitting_rate_stats, 'zar', 'value' ] columns = utilities.flatten(columns) player_pool = player_pool[columns] return player_pool
def create_combined_pitcher_valuations(league): from general import utilities from analysis import calculations from analysis import player_pool_stats assert league.league_name in ['SoS', 'Legacy'] combined_pitchers = player_pool_stats.create_combined_pitchers(league) combined_pitchers['type'] = 'P' combined_pitchers['sample'] = True for run in range(1, 3): combined_pitchers = calculations.calc_z(df=combined_pitchers, ls=league, type='pitching') combined_pitchers['sample'] = combined_pitchers.apply( lambda row: row.zar > 0, axis=1) columns = [ 'name', 'fg_id', 'team', 'type', 'elig', 'ip', league.pitching_counting_stats, league.pitching_rate_stats, 'zar', 'value', 'zar_skills', 'rank_sp', 'rank_rp' ] columns = utilities.flatten(columns) combined_pitchers = combined_pitchers[columns] return combined_pitchers
def create_pitcher_valuations(league, stats): from general import utilities from analysis import calculations from analysis import player_pool_stats assert league.league_name in ['SoS', 'Legacy'] pitchers = stats pitchers['type'] = 'P' pitchers['sample'] = pitchers.apply(lambda row: not (row['era'] == float( 'inf') or row['whip'] == float('inf')), axis=1) for run in range(1, 3): pitchers = calculations.calc_z(df=pitchers, ls=league, type='pitching') pitchers['sample'] = pitchers.apply(lambda row: row.zar > 0, axis=1) columns = [ 'name', 'fg_id', 'team', 'type', 'elig', 'ip', league.pitching_counting_stats, league.pitching_rate_stats, 'zar', 'value', 'zar_skills', 'rank_sp', 'rank_rp' ] columns = utilities.flatten(columns) pitchers = pitchers[columns] return pitchers
def create_actuals_pitchers(ls, year=2021): import pandas as pd from general import utilities from general import postgres from munging import player_names bbdb = postgres.connect_to_bbdb() query = ( 'SELECT pit_std.year, pit_std.bbref_id, pit_std."Tm" as team, pit_std."IP" as ip, pit_start."GS" as gs, pit_start."QS" as qs, pit_std."SO" as so, pit_std."ERA" as era, pit_std."WHIP" as whip, pit_relief."SV" as sv, pit_relief."Hold" as hld FROM ' + '(SELECT * FROM tracking.bbref_pitching_standard) as pit_std ' + 'LEFT JOIN (SELECT * FROM tracking.bbref_pitching_starter) as pit_start ON pit_std.bbref_id=pit_start.bbref_id AND pit_std.year=pit_start.year AND pit_std."Tm"=pit_start."Tm" ' + 'LEFT JOIN (SELECT * FROM tracking.bbref_pitching_reliever) as pit_relief ON pit_std.bbref_id=pit_relief.bbref_id AND pit_std.year=pit_relief.year AND pit_std."Tm"=pit_relief."Tm" ' + 'WHERE pit_std.year=' + str(year)) df = pd.read_sql_query(query, bbdb) df['ip'] = df['ip'].str.replace('.1', '.33', regex=False) df['ip'] = df['ip'].str.replace('.2', '.67', regex=False) df = df.fillna(value={ 'era': 0, 'whip': 0, 'gs': 0, 'qs': 0, 'sv': 0, 'hld': 0 }) for c in ['gs', 'qs', 'so', 'sv', 'hld']: df[c] = df[c].replace(r'^\s*$', 0, regex=True) df[c] = df[c].astype(int) for c in ['ip', 'era', 'whip']: df[c] = df[c].replace(r'^\s*$', 0, regex=True) df[c] = df[c].astype(float) df['svhld'] = df['sv'] + df['hld'] df = df[(df['bbref_id'].notnull()) & (df['bbref_id'] != u'')] # merge in the names and reorder names = player_names.get_player_names() combined_pitchers = df.merge(names[['bbref_id', 'fg_id', 'name']], on='bbref_id', how='left') output_stats = utilities.flatten([['fg_id', 'name', 'team', 'ip'], [ls.pitching_stats]]) combined_pitchers = combined_pitchers[output_stats] combined_pitchers.drop_duplicates(inplace=True) return combined_pitchers
def create_actuals_hitters(ls, year=2021): import pandas as pd from general import utilities from general import postgres from general import classes from munging import player_names bbdb = postgres.connect_to_bbdb() if year == 2021: tablename = 'tracking' else: tablename = 'reference' query = ( 'SELECT year, bbref_id, bat."Tm" as team, bat."PA" as pa, ' + 'bat."HR" as hr, bat."R" as r, bat."RBI" as rbi, bat."SB" as sb, bat."OBP" as obp, bat."OPS" as ops ' + 'FROM ' + tablename + '.bbref_batting_standard bat WHERE year=' + str(year)) df = pd.read_sql_query(query, bbdb) df = df.fillna(value={ 'obp': 0, 'ops': 0, 'pa': 0, 'r': 0, 'rbi': 0, 'sb': 0 }) for c in ['pa', 'r', 'rbi', 'hr', 'sb']: df[c] = df[c].replace(r'^\s*$', 0, regex=True) df[c] = df[c].astype(int) for c in ['obp', 'ops']: df[c] = df[c].replace(r'^\s*$', 0, regex=True) df[c] = df[c].astype(float) df = df[(df['bbref_id'].notnull()) & (df['bbref_id'] != u'')] # merge in the names and reorder names = player_names.get_player_names() combined_hitters = df.merge(names[['bbref_id', 'fg_id', 'name']], on='bbref_id', how='left') output_stats = utilities.flatten( [['fg_id', 'bbref_id', 'name', 'team', 'pa'], [ls.hitting_stats]]) combined_hitters = combined_hitters[output_stats] combined_hitters.drop_duplicates(inplace=True) return combined_hitters
def create_combined_hitter_valuations(league): from datetime import date from general import utilities from analysis import calculations from analysis import player_pool_stats assert league.league_name in ['SoS', 'Legacy'] combined_hitters = player_pool_stats.create_combined_hitters(league) combined_hitters['type'] = 'B' # calc % of season left for determining PAs for sample threshold f_date = date(2021, 4, 1) l_date = date(2021, 9, 30) season_length = l_date - f_date pct_through = (l_date - date.today()).days / (l_date - f_date).days combined_hitters['sample'] = combined_hitters['pa'] > 500 * pct_through for run in range(1, 3): combined_hitters = calculations.calc_z(df=combined_hitters, ls=league, type='hitting') combined_hitters['sample'] = combined_hitters.apply( lambda row: row.zar > 0, axis=1) combined_hitters_600 = player_pool_stats.create_combined_hitters(league, pa=600) combined_hitters_600['type'] = 'B' combined_hitters_600 = combined_hitters_600.merge( combined_hitters[['fg_id', 'sample']], how='left', on='fg_id') combined_hitters_600 = calculations.calc_z(df=combined_hitters_600, ls=league, type='hitting') combined_hitters = combined_hitters.merge( combined_hitters_600[['fg_id', 'value']].rename(columns={'value': 'value_600'}), how='left', on='fg_id') columns = [ 'name', 'fg_id', 'type', 'elig', 'pa', league.hitting_counting_stats, league.hitting_rate_stats, 'zar', 'value', 'value_600' ] columns = utilities.flatten(columns) combined_hitters = combined_hitters[columns] return combined_hitters
def create_last30_hitters(ls): import pandas as pd from general import utilities from general import postgres from general import classes from munging import player_names bbdb = postgres.connect_to_bbdb() query = ( 'SELECT bat.fg_id, bat.team, bat.pa, ' + 'bat.hr, bat.r, bat.rbi, bat.sb, bat.obp, bat.obp+bat.slg as ops ' + 'FROM tracking.batters_last30 AS bat') df = pd.read_sql_query(query, bbdb) df = df.fillna(value={ 'obp': 0, 'ops': 0, 'pa': 0, 'r': 0, 'rbi': 0, 'sb': 0 }) for c in ['pa', 'r', 'rbi', 'hr', 'sb']: df[c] = df[c].replace(r'^\s*$', 0, regex=True) df[c] = df[c].astype(int) for c in ['obp', 'ops']: df[c] = df[c].replace(r'^\s*$', 0, regex=True) df[c] = df[c].astype(float) #df = df[(df['fg_id'].notnull()) & (df['fg_id']!=u'')] # merge in the names and reorder names = player_names.get_player_names() combined_hitters = df.merge(names[['fg_id', 'name']], on='fg_id', how='left') output_stats = utilities.flatten([['fg_id', 'name', 'team', 'pa'], [ls.hitting_stats]]) combined_hitters = combined_hitters[output_stats] combined_hitters.drop_duplicates(inplace=True) return combined_hitters
def calc_z(df, ls, type): import json import sys from general import utilities from analysis import elig assert type in ['hitting', 'batting', 'pitching'] if (type == 'batting' or type == 'hitting'): counting_stats = ls.hitting_counting_stats rate_stats = ls.hitting_rate_stats players_per_team = ls.hitters_per_team budget_split = ls.batting_split denom = 'pa' if (type == 'pitching'): counting_stats = ls.pitching_counting_stats rate_stats = ls.pitching_rate_stats players_per_team = ls.pitchers_per_team budget_split = 1 - ls.batting_split denom = 'ip' stats = utilities.flatten([counting_stats, rate_stats]) # if 'sample' is not predefined then use entire data set if (('sample' in df.columns) == False): df['sample'] = True # calculate mean and standard deviation mean = df[df['sample'] == True].mean() sd = df[df['sample'] == True].std() for var in stats: var_z = var + '_z' df[var_z] = df.apply(lambda row: (row[var] - mean[var]) / sd[var], axis=1) if (var in rate_stats): df[var_z] = df.apply( lambda row: row[var_z] * row[denom] / mean[denom], axis=1) if (type == 'pitching'): df[var_z] = -df[var_z] df[var_z] = ls.z_weights[var] * df[var_z] df['z'] = 0 for var in stats: df['z'] = df.apply(lambda row: (row['z'] + row[var + '_z']), axis=1) df['rank'] = df['z'].rank(method='average', ascending=False) marginal_z = df[df['rank'] == ls.num_teams * players_per_team]['z'].to_list()[0] df['zar'] = df.apply(lambda row: (row['z'] - marginal_z), axis=1) # Catcher adjustment if (type == 'batting' or type == 'hitting'): if not ('elig' in df.columns): eligibilities = elig.get_eligibilities('SoS') df = df.merge(eligibilities[[ 'fg_id', 'elig' ]][eligibilities['fg_id'].isna() == False], on='fg_id', how='left') df['catcher'] = df.apply( lambda row: 'C' in str(row['elig']).split(' '), axis=1) catchers = df[df['catcher']].copy() catchers['rank'] = catchers['zar'].rank(ascending=False) catcher_repl = catchers.iloc[16]['zar'] def add_catcher_repl(row): if row['catcher']: return row['zar'] - catcher_repl else: return row['zar'] df['zar'] = df.apply(lambda row: add_catcher_repl(row), axis=1) del (df['catcher']) if (type == 'pitching'): df['zar_skills'] = df['era_z'] + df['whip_z'] + df['so_z'] if (ls.league_name == 'SoS'): df['elig'] = df.apply(lambda row: 'sp' if (row['qs'] > 0) else 'rp', axis=1) elif (ls.league_name == 'Legacy'): df['elig'] = df.apply( lambda row: 'rp' if (row['svhld'] > 3 or row['ip'] < 30) else 'sp', axis=1) df['rank_sp'] = df[df['elig'] == 'sp']['zar'].rank(ascending=False) df['rank_rp'] = df[df['elig'] == 'rp'].groupby('team')['zar'].rank( ascending=False) sum_zar = df[df['zar'] >= 0]['zar'].sum() df['value'] = df.apply(lambda row: ( (ls.num_teams * 260 * budget_split) * row['zar'] / sum_zar), axis=1) df = df.sort_values(by='value', ascending=False) return df
def update_inseason_valuations(league_sos, league_legacy): import pandas as pd import gspread import gspread_dataframe as gsdf from general import gs from general import utilities from general import postgres sos_hitters = create_combined_hitter_valuations(league=league_sos) \ .rename(columns={'zar': 'zar_sos', 'value': 'value_sos', 'value_600': 'value_600_sos'}) legacy_hitters = create_combined_hitter_valuations(league=league_legacy) \ .rename(columns={'zar': 'zar_legacy', 'value': 'value_legacy', 'value_600': 'value_600_legacy'}) legacy_extra_columns = list( set(legacy_hitters.columns).difference(sos_hitters.columns)) legacy_extra_columns = utilities.flatten(['fg_id', legacy_extra_columns]) columns = [ 'name', 'fg_id', 'type', 'elig', 'pa', league_sos.hitting_counting_stats, league_sos.hitting_counting_stats, league_legacy.hitting_rate_stats, league_legacy.hitting_rate_stats, 'value_sos', 'value_600_sos', 'value_legacy', 'value_600_legacy' ] columns = utilities.flatten(columns) combined_hitters = sos_hitters.merge(legacy_hitters[legacy_extra_columns], on='fg_id') combined_hitters.drop_duplicates(subset=['fg_id'], inplace=True) # Merge in the ownership bbdb = postgres.connect_to_bbdb() sos_rosters = pd.read_sql( 'SELECT fg_id, sos."Team" as sos_team FROM rosters.sos', con=bbdb) sos_rosters[['fg_id']] = sos_rosters[['fg_id']].astype(str) combined_hitters = combined_hitters.merge(sos_rosters, how='left', on='fg_id') legacy_rosters = pd.read_sql( 'SELECT fg_id, legacy."Team" as legacy_team FROM rosters.legacy', con=bbdb) legacy_rosters[['fg_id']] = legacy_rosters[['fg_id']].astype(str) combined_hitters = combined_hitters.merge(legacy_rosters, how='left', on='fg_id') combined_hitters.drop(combined_hitters[ (combined_hitters['fg_id'] == '19755') & (combined_hitters['legacy_team'] == 'Harper Wallbanger')].index, inplace=True) # Pitchers sos_pitchers = create_combined_pitcher_valuations(league=league_sos) \ .rename(columns={'zar': 'zar_sos', 'value': 'value_sos'}) legacy_pitchers = create_combined_pitcher_valuations(league=league_legacy) \ .rename(columns={'zar': 'zar_legacy', 'value': 'value_legacy'}) legacy_extra_columns = list( set(legacy_pitchers.columns).difference(sos_pitchers.columns)) legacy_extra_columns = utilities.flatten(['fg_id', legacy_extra_columns]) columns = [ 'name', 'fg_id', 'type', 'ip', league_sos.pitching_counting_stats, league_legacy.pitching_counting_stats, league_sos.pitching_rate_stats, league_legacy.pitching_rate_stats, 'zar_sos', 'value_sos', 'zar_legacy', 'value_legacy' ] columns = utilities.flatten(columns) combined_pitchers = sos_pitchers.merge( legacy_pitchers[legacy_extra_columns], on='fg_id') combined_pitchers = combined_pitchers[columns] # Merge in CFIP bbdb = postgres.connect_to_bbdb() cfip = pd.read_sql('SELECT * FROM hist.bp_pitchers_raw', con=bbdb) combined_pitchers = combined_pitchers.merge(cfip[['fg_id', 'DRA', 'cFIP']], how='left', on='fg_id') # Merge in xxxFIP bbdb = postgres.connect_to_bbdb() cfip = pd.read_sql('SELECT * FROM tracking.xxxfip WHERE fg_id IS NOT NULL', con=bbdb) combined_pitchers = combined_pitchers.merge(cfip[['fg_id', 'xxxFIP']], how='left', on='fg_id') # Merge in the ownership combined_pitchers = combined_pitchers.merge(sos_rosters, how='left', on='fg_id') combined_pitchers = combined_pitchers.merge(legacy_rosters, how='left', on='fg_id') combined_pitchers.drop(combined_pitchers[ (combined_pitchers['fg_id'] == '19755') & (combined_pitchers['legacy_team'] == 'Florun\'s Team')].index, inplace=True) # Update Google Sheets gc = gspread.service_account(filename='./bb-2021-2b810d2e3d25.json') sh = gc.open("BB 2021 InSeason").worksheet('Proj - Hitters') gsdf.set_with_dataframe(sh, combined_hitters) gs.format_gsheet(sheet=sh) sh = gc.open("BB 2021 InSeason").worksheet('Proj - Pitchers') gsdf.set_with_dataframe(sh, combined_pitchers) gs.format_gsheet(sheet=sh)
def __init__(self, league_type): import sys sys.path.append('python/general') from general import utilities self.league_name = league_type self.name = self.league_name self.year = 2021 if (league_type == 'SoS'): self.league_platform = 'fleaflicker' self.league_num = '23172' self.num_teams = 16 self.hitting_counting_stats = ['hr', 'r', 'rbi', 'sb'] self.hitting_rate_stats = ['obp', 'ops'] self.hitting_other_stats = ['ab'] self.z_weights_nominal_hitting = { #'hr':1, 'r':1, 'rbi':1, 'sb':1.1, 'obp':1.3, 'ops':1.2 'hr': 1, 'r': 1, 'rbi': 1, 'sb': 1, 'obp': 1.1, 'ops': 1.1 } self.z_weights_hitting = self.normalize_z_weights( self.z_weights_nominal_hitting) self.pitching_counting_stats = ['qs', 'so', 'sv', 'hld'] self.pitching_rate_stats = ['era', 'whip'] self.pitching_other_stats = ['gs', 'g'] self.z_weights_nominal_pitching = { #'qs':1.2, 'so':1, 'sv':.9, 'hld':.6, 'era':1.2, 'whip':1.2 'qs': 1, 'so': 1.1, 'sv': 1, 'hld': 1, 'era': 1, 'whip': 1 } self.z_weights_pitching = self.normalize_z_weights( self.z_weights_nominal_pitching) self.z_weights = { **self.z_weights_hitting, **self.z_weights_pitching } self.batting_split = .6 self.hitters_per_team = 12.5 self.pitchers_per_team = 12.5 elif (league_type == 'Legacy'): self.league_platform = 'yahoo' self.league_num = '26574' self.num_teams = 12 self.hitting_counting_stats = ['hr', 'r', 'rbi', 'sb'] self.hitting_rate_stats = ['obp'] self.z_weights_nominal_hitting = { 'hr': 1, 'r': 1, 'rbi': 1, 'sb': .2, 'obp': 1 } self.z_weights_hitting = self.normalize_z_weights( self.z_weights_nominal_hitting) self.pitching_counting_stats = ['ip', 'so', 'svhld'] self.pitching_rate_stats = ['era', 'whip'] self.z_weights_nominal_pitching = { 'ip': 1, 'so': 1, 'svhld': .8, 'era': 1, 'whip': 1 } self.z_weights_pitching = self.normalize_z_weights( self.z_weights_nominal_pitching) self.z_weights = { **self.z_weights_hitting, **self.z_weights_pitching } self.batting_split = .6 self.hitters_per_team = 12.5 self.pitchers_per_team = 12.5 self.hitting_stats = utilities.flatten( [self.hitting_rate_stats, self.hitting_counting_stats]) self.pitching_stats = utilities.flatten( [self.pitching_rate_stats, self.pitching_counting_stats])
def create_combined_pitchers(ls): import pandas as pd from munging import player_names from general import postgres, utilities bbdb = postgres.connect_to_bbdb() query = ( 'SELECT \'razz\' as source, fg_id, ip, qs, era, whip, k as so, sv, hld ' + 'FROM proj.razz_pitchers') df_razz = pd.read_sql_query(query, bbdb) df_razz['svhld'] = (df_razz['sv'] + df_razz['hld']) query = ( 'SELECT \'fg_dc\' as source, fg_id, ip, qs, era, whip, so, sv, hld ' + 'FROM proj.fg_dc_pitchers ') df_fg_dc = pd.read_sql_query(query, bbdb) df_fg_dc['qs'] = df_fg_dc['qs'].replace({0: None}) df_fg_dc['svhld'] = (df_razz['sv'] + df_razz['hld']) df = pd.concat([df_razz, df_fg_dc]) df_ip = df[['source', 'fg_id', 'ip']] query_teams = 'SELECT playerid as fg_id, fg_dc_pitchers_raw."Team" as team FROM proj.fg_dc_pitchers_raw' df_teams = pd.read_sql_query(query_teams, bbdb) # if 'sample' is not predefined then use entire data set for var in ls.pitching_counting_stats: df[var] = df.apply(lambda row: 0 if pd.isna(row[var]) else row[var], axis=1) weights = { 'system': ['fg_dc', 'thebat', 'thebatx', 'pod', 'razz'], 'sys_weight': [1, 1, 1.2, 0, .01] } weights = pd.DataFrame(weights) df = df.merge(right=weights, how='left', left_on='source', right_on='system') weights_ip = { 'system': ['fg_dc', 'thebat', 'thebatx', 'pod', 'razz'], 'sys_weight': [.25, 0, 0, 0, .01] } weights_ip = pd.DataFrame(weights_ip) df_ip = df_ip.merge(right=weights_ip, how='left', left_on='source', right_on='system') def weighted_average(df, data_col, weight_col, by_col): df['_data_times_weight'] = df[data_col] * df[weight_col] df['_weight_where_notnull'] = df[weight_col] * pd.notnull(df[data_col]) g = df.groupby(by_col) result = g['_data_times_weight'].sum( ) / g['_weight_where_notnull'].sum() del df['_data_times_weight'], df['_weight_where_notnull'] result = pd.DataFrame(result, columns=[data_col]) return result df.loc[df['source'] == 'fg_dc', 'qs'] = None combined_pitchers = pd.DataFrame(df_ip['fg_id'].unique(), columns=['fg_id']) statlist = list(set(utilities.flatten([['ip'], ls.pitching_stats]))) for stat in statlist: # do this list(set(*)) to get unique values b/c ip may be in there twice t = weighted_average(df, stat, 'sys_weight', 'fg_id') combined_pitchers = combined_pitchers.merge(t, on='fg_id') # merge in the names and reorder names = player_names.get_player_names() combined_pitchers = combined_pitchers.merge(names[['fg_id', 'name']], on='fg_id', how='left') combined_pitchers = combined_pitchers.merge(df_teams, on='fg_id', how='left') output_stats = ['fg_id', 'name', 'team', 'ip'] for stat in ls.pitching_stats: if (stat in output_stats) is False: output_stats.append(stat) combined_pitchers = combined_pitchers[output_stats] return combined_pitchers
def create_combined_hitters(ls, pa=0): import pandas as pd from general import utilities from general import postgres from munging import player_names bbdb = postgres.connect_to_bbdb() query = ( 'SELECT proj.* FROM (' + 'SELECT \'fg_dc\' as source, fg_id, pa, hr_pa, r_pa, rbi_pa, sb_pa, obp, ops ' + 'FROM proj.fg_dc_batters ' + 'UNION ' + 'SELECT \'thebat\' as source, fg_id, pa, hr_pa, r_pa, rbi_pa, sb_pa, obp, ops ' + 'FROM proj.thebat_batters ' + 'UNION ' + 'SELECT \'thebatx\' as source, fg_id, pa, hr_pa, r_pa, rbi_pa, sb_pa, obp, ops ' + 'FROM proj.thebatx_batters ' + 'UNION ' + 'SELECT \'pod\' as source, fg_id, pa, hr_pa, r_pa, rbi_pa, sb_pa, obp, ops ' + 'FROM proj.pod_batters ' + ') AS proj') df = pd.read_sql_query(query, bbdb) query_pa = ( 'SELECT proj.* FROM (' + 'SELECT \'fg_dc\' as source, fg_id, pa ' + 'FROM proj.fg_dc_batters ' + #'UNION ' + #'SELECT \'pod\' as source, fg_id, pa ' + #'FROM proj.pod_batters ' + ') AS proj') df_pa = pd.read_sql_query(query_pa, bbdb) df_pa.loc[df_pa['fg_id'] == 'sa3011918', 'fg_id'] = '27506' query_teams = 'SELECT playerid as fg_id, fg_dc_batters_raw."Team" as team FROM proj.fg_dc_batters_raw' df_teams = pd.read_sql_query(query_teams, bbdb) df_teams.loc[df_teams['fg_id'] == 'sa3011918', 'fg_id'] = '27506' weights = { 'system': ['fg_dc', 'thebat', 'thebatx', 'pod'], 'sys_weight': [1, 1, 1.2, .6] } weights = pd.DataFrame(weights) df = df.merge(right=weights, how='left', left_on='source', right_on='system') weights_pa = {'system': ['fg_dc', 'pod'], 'sys_weight': [1, 0]} weights_pa = pd.DataFrame(weights_pa) df_pa = df_pa.merge(right=weights_pa, how='left', left_on='source', right_on='system') def weighted_average(df, data_col, weight_col, by_col): df['_data_times_weight'] = df[data_col] * df[weight_col] df['_weight_where_notnull'] = df[weight_col] * pd.notnull(df[data_col]) g = df.groupby(by_col) result = g['_data_times_weight'].sum( ) / g['_weight_where_notnull'].sum() del df['_data_times_weight'], df['_weight_where_notnull'] result = pd.DataFrame(result, columns=[data_col]) return result combined_hitters = pd.DataFrame(df_pa['fg_id'].unique(), columns=['fg_id']) for stat in ['pa']: t = weighted_average(df_pa, stat, 'sys_weight', 'fg_id') combined_hitters = combined_hitters.merge(t, on='fg_id') if (pa > 0): combined_hitters['pa'] = pa stats_pa = [] for stat in ls.hitting_counting_stats: stats_pa.append(stat + '_pa') for stat in utilities.flatten([stats_pa, ls.hitting_rate_stats]): t = weighted_average(df, stat, 'sys_weight', 'fg_id') combined_hitters = combined_hitters.merge(t, on='fg_id') for stat in ls.hitting_counting_stats: stat_pa = stat + '_pa' combined_hitters[ stat] = combined_hitters[stat_pa] * combined_hitters['pa'] combined_hitters = combined_hitters.drop(columns=[stat_pa]) # merge in the names and reorder names = player_names.get_player_names() combined_hitters = combined_hitters.merge(names[['fg_id', 'name']], on='fg_id', how='left') combined_hitters = combined_hitters.merge(df_teams, on='fg_id', how='left') output_stats = utilities.flatten([['fg_id', 'name', 'team', 'pa'], [ls.hitting_stats]]) combined_hitters = combined_hitters[output_stats] return combined_hitters