def main(): school_matching = hlp.return_college_matching_dict() local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "college_players_build.json")) data = json.load(f) hlp.return_college_matching_dict() matching = hlp.return_matching_dict() two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join(two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['folder'], data['file']) df = pd.read_csv(source) df['full_name'] = df[['first_name', 'last_name']].astype(str).apply(' '.join, axis=1) df['position_group'] = df['position'].map(matching['position_groups']) df['section'] = df['position_group'].map(matching['section']) df.rename(columns=data['column_rename'], inplace=True) df = df[data['column_order']] df['college'] = df['college'].map(school_matching).fillna(df['college']).map(matching['college']).fillna( df['college']) target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def add_espn_id(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "madden_build.json")) data = json.load(f) two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join( two_up, data['target']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['output_folder'], data['output_file']) df = pd.read_csv(source) espn_id_df = hlp.return_id_df( ['first_name', 'last_name', 'position_group', 'espn_id']) print("fuzzy merging madden outputs") df = cm.fuzzy_merge(df, espn_id_df, ['first_name', 'last_name', 'position_group'], ['first_name', 'last_name', 'position_group'], threshold=95, limit=1) df = df[data['id_column_order']] target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "facts_cities.json")) data = json.load(f) two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join(two_up, data['source']) target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['econ_input']['folder'], data['econ_input']['file']) df = pd.read_csv(source) source = os.path.join(source_dir, data['weather_input']['folder'], data['weather_input']['file']) weather_df = pd.read_csv(source) weather_df = weather_df[data['weather_keep_columns']] df = df.merge(weather_df, on=['fms_city_id'], how='inner') df = df[data['keep_columns']] df.drop_duplicates(subset='fms_city_id', keep='last', inplace=True) target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "dimensions_players_build.json")) data = json.load(f) matching = hlp.return_matching_dict() two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['draft']['folder'], data['draft']['file']) df = pd.read_csv(source) ### Read college players in, including hometown ### source = os.path.join(source_dir, data['college_players']['folder'], data['college_players']['file']) df_players = pd.read_csv(source) df_players = df_players[data['college_players_keep']] df = pd.merge(df, df_players, left_on=['espn_id'], right_on=['espn_id'], how='left') # inner join #df = df.drop_duplicates(subset='espn_id', keep='first') source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows source = os.path.join(source_dir, data['draft']['folder'], data['combine_stats']['file']) df_combine = pd.read_csv(source) df_combine = df_combine[data['combine_stats_keep']] df = pd.merge(df, df_combine, left_on=['fms_id'], right_on=['fms_id'], how='left') df.rename(columns=data['column_rename'], inplace=True) df = df.drop_duplicates(subset='fms_id', keep='last') df_college_id = hlp.return_fms_college_id() df = df.merge(df_college_id, on='college', how='left') df = df[data['column_order']] target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): print("got to main madden build") local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "madden_build.json")) data = json.load(f) matching = hlp.return_matching_dict() # get global matching dictionary two_up = os.path.abspath(os.path.join(local_path, "../..")) df = pd.DataFrame(columns=data['columns']) source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) counter = 0 # first one will not be matched for file in data['file_list']: source = os.path.join(source_dir, file['folder'], file['file']) temp_df = pd.read_csv(source) temp_df.rename(columns=data['column_rename'], inplace=True) temp_df['year'] = data['year'][file['file']] # add year temp_df['position_group'] = temp_df['position'].map( matching['position_groups']) temp_df = temp_df[data['columns']] # cut all extra columns new_column_name = str(data['year'][file['file']]) + "_madden_rating" temp_df[new_column_name] = temp_df['madden_rating'] if counter == 0: df = df.append(temp_df) else: df_1 = cm.fuzzy_merge( df, temp_df, ['first_name', 'last_name', 'position_group'], ['first_name', 'last_name', 'position_group'], threshold=95, limit=1) # inner join df_2 = pd.concat([temp_df, df_1]) df = pd.concat([df, df_2]) df = df.drop_duplicates( subset=['first_name', 'last_name', 'position_group'], keep='last') counter += 1 df['section'] = df['position_group'].map(matching['section']) df.rename(columns=data['column_rename'], inplace=True) print(df.columns) df = df[data['column_order']] target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): school_matching = hlp.return_college_matching_dict() local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "combine_stats_build.json")) data = json.load(f) matching = hlp.return_matching_dict() two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['folder'], data['file']) df = pd.read_csv(source) df['college'] = df['college'].map(hlp.return_college_matching_dict()) df['first_name'] = df['player'].str.split(' ').str[0] df['last_name'] = df['player'].str.split(' ').str[1] df['position_group'] = df['pos'].map(matching['position_groups']) df['section'] = df['position_group'].map(matching['section']) df.rename(columns=data['column_rename'], inplace=True) espn_id_df = hlp.return_id_df() master_df = hlp.return_fms_id_df() df = pd.merge(df, espn_id_df, left_on=['last_name', 'college', 'position_group'], right_on=['last_name', 'college', 'position_group'], how='left') df = pd.merge( df, master_df, left_on=['first_name', 'last_name', 'college', 'position_group'], right_on=['first_name', 'last_name', 'college', 'position_group'], how='left') df = df[data['column_order']] df['college'] = df['college'].map(school_matching).fillna( df['college']).map(matching['college']).fillna(df['college']) target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "city_economics_build.json")) data = json.load(f) two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['homewtown_econ']['folder'], data['homewtown_econ']['file']) df = pd.read_csv(source) df.rename(columns=data['column_rename'], inplace=True) source = os.path.join(source_dir, data['collegetown_econ']['folder'], data['collegetown_econ']['file']) college_town_df = pd.read_csv(source) college_town_df.rename(columns=data['column_rename'], inplace=True) df = df.append(college_town_df, ignore_index=True) city_df = hlp.return_fms_city_id() df = df.merge(city_df, on='city_state', how='left') for column in data['numerical_columns']: df[column] = df[column].apply( hlp.currency_to_float) # convert currency to float, remove $ and , df = df[data['column_keep']] df.drop_duplicates(subset='fms_city_id', keep='last', inplace=True) target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "player_master.json")) data = json.load(f) matching = hlp.return_matching_dict() two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['folder'], data['file']) df = pd.read_csv(source) df['college'] = df['college'].map(hlp.return_college_matching_dict()) df['first_name'] = df['player'].str.split(' ').str[0] df['last_name'] = df['player'].str.split(' ').str[1] df['position_group'] = df['pos'].map(matching['position_groups']) df['section'] = df['position_group'].map(matching['section']) df.rename(columns=data['column_rename'], inplace=True) espn_id_df = hlp.return_id_df() df = pd.merge(df, espn_id_df, left_on=['last_name', 'college', 'position_group'], right_on=['last_name', 'college', 'position_group'], how='left') df = df.assign(fms_id=(df['first_name'] + '_' + df['last_name'] + '_' + df['position_group'] + '_' + "draft_year").astype('category').cat.codes) df = df[data['keep_columns']] target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "dimensions_colleges_build.json")) data = json.load(f) matching = hlp.return_matching_dict() two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join(two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['colleges']['folder'], data['colleges']['file']) df = pd.read_csv(source) df = df.drop_duplicates(subset='fms_college_id', keep='last') #df = df[data['column_order']] target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): school_matching = hlp.return_college_matching_dict() local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "college_economics_build.json")) data = json.load(f) hlp.return_college_matching_dict() matching = hlp.return_matching_dict() two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['folder'], data['file']) df = pd.read_csv(source) df.rename(columns=data['column_rename'], inplace=True) df = df[data['column_keep']] for column in data['numerical_columns']: df[column] = df[column].apply( hlp.currency_to_float) # convert currency to float, remove $ and , df['college'] = df['college'].map(school_matching).fillna(df['college']) df['college'] = df['college'].map(matching['college']).fillna( df['college']) #df = df.groupby('college').mean().reset_index() master_college_df = hlp.return_fms_college_id() df = df.merge(master_college_df, on='college', how='left') df = df[data['column_order']] target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "facts_college_metrics.json")) data = json.load(f) two_up = os.path.abspath(os.path.join(local_path, "../..")) source = os.path.join(two_up, data['dimension_colleges']['folder'], data['dimension_colleges']['file']) df = pd.read_csv(source) df = df['fms_college_id'] source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['college_budget']['folder'], data['college_budget']['file']) college_budget_df = pd.read_csv(source) df = pd.merge(df, college_budget_df, left_on=['fms_college_id'], right_on=['fms_college_id'], how='left') df.rename(columns=data['column_rename'], inplace=True) #df = df[data['column_order']] """ z_score_list = [] # to add te output df for col in data['z_score_columns']: col_zscore = col + '_zscore' z_score_list.append(col_zscore) df[col_zscore] = (df[col] - df[col].mean())/df[col].std(ddof=0) """ target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): prefix = "rb_" local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "reporting_rb.json")) data = json.load(f) two_up = os.path.abspath(os.path.join(local_path, "../..")) target_dir = os.path.join(two_up, data['target']) source = os.path.join(two_up, data['dimension_players']['folder'], data['dimension_players']['file']) df = pd.read_csv(source) df = df[df['position'].str.contains("RB")] ### get city IDs for colleges source = os.path.join(two_up, data['dimension_colleges']['folder'], data['dimension_colleges']['file']) college_city_df = pd.read_csv(source) college_city_df = college_city_df[data['dimension_colleges_keep_columns']] ### merge dimension players and dimension colleges ### df = pd.merge(df, college_city_df, on='fms_college_id', how='left') df = df.drop_duplicates(subset='fms_id', keep='last') ### player stats ### source = os.path.join(two_up, data['facts_player_metrics']['folder'], data['facts_player_metrics']['file']) player_stats_df = pd.read_csv(source) df = pd.merge(df, player_stats_df, on='fms_id', how='left') df = df.drop_duplicates(subset='fms_id', keep='last') ### college stats ### source = os.path.join(two_up, data['facts_college_metrics']['folder'], data['facts_college_metrics']['file']) college_stats_df = pd.read_csv(source) college_stats_df = college_stats_df[data['college_stats_keep_columns']] college_stats_df = college_stats_df.groupby('fms_college_id').mean().reset_index() df = pd.merge(df, college_stats_df, left_on='fms_college_id', right_on='fms_college_id', how='left') df = df.drop_duplicates(subset='fms_id', keep='last') ### city stats ### source = os.path.join(two_up, data['facts_cities_metrics']['folder'], data['facts_cities_metrics']['file']) city_stats_df = pd.read_csv(source) df = pd.merge(df, city_stats_df, left_on='fms_city_id', right_on='fms_city_id', how='left') df = df.drop_duplicates(subset='fms_id', keep='last') ### add conference ### source = os.path.join(two_up, data['dimension_colleges']['folder'], data['dimension_colleges']['file']) conference_df = pd.read_csv(source) conference_df = conference_df[data['conference_keep_columns']] df = pd.merge(df, conference_df, left_on='fms_college_id', right_on='fms_college_id', how='left') df = df.drop_duplicates(subset='fms_id', keep='last') ### math transformations ### df['hw_ratio'] = df['college_height_inches'] / df['college_weight_pounds'] df['conference_scale'] = df['conference'].map(data['conference_scale']) df['conference_scale'] = df['conference_scale'].fillna(0.7) df['conference_scale'] = df['conference_scale'].astype(float) # convert to float for column in data['per_game_columns']: new_name = str(column) + '_pg' df[new_name] = df[column]/df['rushing_games'] scaled_name = new_name + "_cf_scaled" df[scaled_name] = df[new_name] * df['conference_scale'] ### apply z score ### z_score_list = [] # to add te output df for col in data['z_score_columns']: col_zscore = prefix + col + '_zscore' z_score_list.append(col_zscore) mean = df[col].mean() stdev = df[col].std(ddof=0) min = df[col].min() max = df[col].max() df[col_zscore] = (df[col] - mean) / stdev hlp.write_representative_statistics(col_zscore, mean, stdev, min, max) df = df[data['column_order']] target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "college_master.json")) data = json.load(f) matching = hlp.return_matching_dict() school_matching = hlp.return_college_matching_dict() two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) # pull in combine source = os.path.join(source_dir, data['combine']['folder'], data['combine']['file']) combine_df = pd.read_csv(source) combine_df = combine_df[data['combine_keep']].drop_duplicates( subset='college').reset_index(drop=True) combine_df['college'] = combine_df['college'].map(school_matching).fillna( combine_df['college']) # pull in college weather source = os.path.join(source_dir, data['college_weather']['folder'], data['college_weather']['file']) college_weather_df = pd.read_csv(source) college_weather_df.rename(columns=data['college_weather_rename'], inplace=True) college_weather_df = college_weather_df[ data['college_weather_keep']].drop_duplicates( subset='college').reset_index(drop=True) college_weather_df['college'] = college_weather_df['college'].map( school_matching).fillna(college_weather_df['college']) # pull in college econ source = os.path.join(source_dir, data['college_econ']['folder'], data['college_econ']['file']) college_econ_df = pd.read_csv(source) college_econ_df.rename(columns=data['college_econ_rename'], inplace=True) college_econ_df = college_econ_df[ data['college_econ_keep']].drop_duplicates( subset='college').reset_index(drop=True) college_econ_df['college'] = college_econ_df['college'].map( school_matching).fillna(college_econ_df['college']) # pull in college funding data source = os.path.join(source_dir, data['college_budget']['folder'], data['college_budget']['file']) college_budget_df = pd.read_csv(source) college_budget_df.rename(columns=data['college_budget_rename'], inplace=True) college_budget_df = college_budget_df[ data['college_budget_keep']].drop_duplicates( subset='college').reset_index(drop=True) college_budget_df['college'] = college_budget_df['college'].map( school_matching).fillna(college_budget_df['college']) # pull in conferences source = os.path.join(source_dir, data['conferences']['folder'], data['conferences']['file']) college_conference_df = pd.read_csv(source) college_conference_df = college_conference_df.drop_duplicates( subset='college').reset_index(drop=True) college_conference_df['college'] = college_conference_df['college'].map( school_matching).fillna(college_conference_df['college']) sources_list = [ combine_df, college_weather_df, college_econ_df, college_budget_df, college_conference_df ] df, matching_dict = gld.golden_source_merge(sources_list, ['college'], 98) matching_dict[ 'Texas'] = 'Texas' # hand jam Texas so it doesn't match with Texas College # remap names combine_df['college'] = combine_df['college'].map(matching_dict).fillna( combine_df['college']) combine_df['college'] = combine_df['college'].map( matching['college']).fillna(combine_df['college']) college_weather_df['college'] = college_weather_df['college'].map( matching_dict).fillna(college_weather_df['college']) college_weather_df['college'] = college_weather_df['college'].map( matching['college']).fillna(college_weather_df['college']) college_econ_df['college'] = college_econ_df['college'].map( matching_dict).fillna(college_econ_df['college']) college_econ_df['college'] = college_econ_df['college'].map( matching['college']).fillna(college_econ_df['college']) college_budget_df['college'] = college_budget_df['college'].map( matching_dict).fillna(college_budget_df['college']) college_budget_df['college'] = college_budget_df['college'].map( matching['college']).fillna(college_budget_df['college']) college_conference_df['college'] = college_conference_df['college'].map( matching_dict).fillna(college_conference_df['college']) college_conference_df['college'] = college_conference_df['college'].map( matching['college']).fillna(college_conference_df['college']) df = df.merge( combine_df, how='left', on='college').drop_duplicates(subset='college').reset_index(drop=True) df = df.merge( college_weather_df, how='left', on='college').drop_duplicates(subset='college').reset_index(drop=True) df = df.merge( college_econ_df, how='left', on='college').drop_duplicates(subset='college').reset_index(drop=True) df = df.merge( college_budget_df, how='left', on='college').drop_duplicates(subset='college').reset_index(drop=True) df = df.merge( college_conference_df, how='left', on='college').drop_duplicates(subset='college').reset_index(drop=True) df['city'] = df['city_state'].apply(lambda x: x.split(',')[0] if isinstance(x, str) else "") df['state'] = df['city_state'].apply(lambda x: x.split(',')[1] if isinstance(x, str) else "") df = df.assign(fms_college_id=(df['college']).astype('category').cat.codes) geo_df = hlp.return_fms_city_id() df = df.merge(geo_df, on=['city_state'], how='left') df = df[data['keep_columns']] new_dict = {} new_dict['college'] = matching_dict matching.update(new_dict) hlp.write_matching_dict(matching) # drop duplicates df.drop_duplicates(subset='fms_college_id', keep='last', inplace=True) target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "geo_master.json")) data = json.load(f) matching = hlp.return_matching_dict() two_up = os.path.abspath(os.path.join(local_path, "../..")) source_dir = os.path.join(two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) # pull in colleges source = os.path.join(source_dir, data['colleges']['folder'], data['colleges']['file']) college_cities_df = pd.read_csv(source) college_cities_df = college_cities_df[data['college_keep']] college_cities_df.rename(columns=data["college_df_rename"], inplace=True) source = os.path.join(source_dir, data['hometowns']['folder'], data['hometowns']['file']) hometown_df = pd.read_csv(source) hometown_df = hometown_df[data['hometowns_keep']] hometown_df.rename(columns=data["hometown_df_rename"], inplace=True) sources_list = [college_cities_df, hometown_df] df, matching_dict = gld.golden_source_merge(sources_list, ['city_state'], 98) hometown_df['city_state'] = hometown_df['city_state'].map(matching_dict).fillna(hometown_df['city_state']) college_cities_df['city_state'] = college_cities_df['city_state'].map(matching_dict).fillna(college_cities_df['city_state']) df = df.merge(hometown_df, how='left', on='city_state') df = df.merge(college_cities_df, how='left', on='city_state') df['latitude'] = df['latitude_x'].combine_first(df['latitude_y']) df['longitude'] = df['longitude_x'].combine_first(df['longitude_y']) df['city'] = df['city_state'].apply(lambda x: x.split(',')[0]) df['state'] = df['city_state'].apply(lambda x: x.split(',')[1]) df = df.assign(fms_city_id=(df['city_state']).astype('category').cat.codes) df['country'] = "" # to be filled in later df = df[data['keep_columns']] new_dict = {} new_dict['cities'] = matching_dict matching.update(new_dict) hlp.write_matching_dict(matching) # drop duplicates df.drop_duplicates(subset='fms_city_id', keep='last', inplace=True) target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)
def main(): local_path = os.path.dirname(os.path.abspath(__file__)) f = open(os.path.join(local_path, "facts_player_metrics.json")) data = json.load(f) two_up = os.path.abspath(os.path.join(local_path, "../..")) source = os.path.join(two_up, data['dimension_players']['folder'], data['dimension_players']['file']) df = pd.read_csv(source) df = df['fms_id'] ### madden stats ### source_dir = os.path.join( two_up, data['source']) # should work in both mac and windows target_dir = os.path.join(two_up, data['target']) source = os.path.join(source_dir, data['madden_ratings']['folder'], data['madden_ratings']['file']) madden_df = pd.read_csv(source) madden_df = madden_df[data["madden_keep_pre"]] # drop duplicates, need to fix this later madden_df = madden_df.drop_duplicates(subset='fms_id', keep='last') madden_df['max_madden'] = np.nanmax(madden_df[madden_df.columns.difference( ['fms_id'])].values, axis=1) madden_df = madden_df[data['madden_keep_post']] df = pd.merge(df, madden_df, left_on=['fms_id'], right_on=['fms_id'], how='left') ### combine stats ### source = os.path.join(source_dir, data['combine_stats']['folder'], data['combine_stats']['file']) combine_df = pd.read_csv(source) # drop duplicates, need to fix this later combine_df = combine_df.drop_duplicates(subset='fms_id', keep='last') df = pd.merge(df, combine_df, left_on=['fms_id'], right_on=['fms_id'], how='left') ### college stats ### source = os.path.join(source_dir, data['college_stats']['folder'], data['college_stats']['file']) df_college_stats = pd.read_csv(source) # drop duplicates, need to fix this later df_college_stats = df_college_stats.drop_duplicates(subset='fms_id', keep='last') df = pd.merge(df, df_college_stats, on='fms_id', how='left') # left join df.rename(columns=data['column_rename'], inplace=True) df = df[data['column_order']] target_folder = os.path.join(target_dir, data['output_folder']) hlp.make_folder_if_not_exists(target_folder) target = os.path.join(target_folder, data['output_file']) df.to_csv(target, index=False)