def scrape_pages(driver, env, team_rosters, players): pitcher_list = [] batter_list = [] for i, player in players.iterrows(): my_player = env['MY_PLAYERS'].get(player['espn_name']) team_id = team_rosters.get(player['bref_name']) if player['bref_id'] == '': continue if team_id is not None: player['team_id'] = team_id TEAM_ROSTER_NAMES_FOUND[player['bref_name']] = True elif TRANSLATIONS.get(player['bref_name']) is not None: translation = TRANSLATIONS.get(player['bref_name']) team_id = team_rosters.get(translation) player['team_id'] = team_id TEAM_ROSTER_NAMES_FOUND[translation] = True else: continue if TRADES.get(player['bref_name']) is not None: team_id = TRADES.get(player['bref_name']) #if team_id != 8: # continue print('Scraping Player: ' + player['bref_name'] + ' index: ' + str(i) + '\n\n') current_player = Player(player) if my_player is not None: current_player.mine = True # Baseball Reference br_url = env['BR_URL'] + current_player.bref_id if not current_player.batter: br_url = br_url + '&t=p' driver.get(br_url) tr_xpath_start = '//*[@id="total"]/tbody/tr' current_player = get_stats(driver, 'BR', tr_xpath_start, './/th[1]', current_player) tr_xpath_start = '//*[@id="plato"]/tbody/tr' current_player = get_stats(driver, 'BR', tr_xpath_start, './/th[1]', current_player) if not current_player.batter: tr_xpath_start = '//*[@id="total_extra"]/tbody/tr' current_player = get_stats(driver, 'BR', tr_xpath_start, './/th[1]', current_player) if current_player.batter: batter_list.append(current_player.to_dict()) else: pitcher_list.append(current_player.to_dict()) return { 'batter_list': batter_list, 'pitcher_list': pitcher_list, }