コード例 #1
0
def scrape_pages(driver, env, team_rosters, players):
    pitcher_list = []
    batter_list = []
    for i, player in players.iterrows():
        my_player = env['MY_PLAYERS'].get(player['espn_name'])
        team_id = team_rosters.get(player['bref_name'])

        if player['bref_id'] == '':
            continue

        if team_id is not None:
            player['team_id'] = team_id
            TEAM_ROSTER_NAMES_FOUND[player['bref_name']] = True
        elif TRANSLATIONS.get(player['bref_name']) is not None:
            translation = TRANSLATIONS.get(player['bref_name'])
            team_id = team_rosters.get(translation)
            player['team_id'] = team_id
            TEAM_ROSTER_NAMES_FOUND[translation] = True
        else:
            continue

        if TRADES.get(player['bref_name']) is not None:
            team_id = TRADES.get(player['bref_name'])

        #if team_id != 8:
        #    continue

        print('Scraping Player: ' + player['bref_name'] + ' index: ' + str(i) +
              '\n\n')

        current_player = Player(player)
        if my_player is not None:
            current_player.mine = True

        # Baseball Reference
        br_url = env['BR_URL'] + current_player.bref_id
        if not current_player.batter:
            br_url = br_url + '&t=p'

        driver.get(br_url)
        tr_xpath_start = '//*[@id="total"]/tbody/tr'
        current_player = get_stats(driver, 'BR', tr_xpath_start, './/th[1]',
                                   current_player)
        tr_xpath_start = '//*[@id="plato"]/tbody/tr'
        current_player = get_stats(driver, 'BR', tr_xpath_start, './/th[1]',
                                   current_player)
        if not current_player.batter:
            tr_xpath_start = '//*[@id="total_extra"]/tbody/tr'
            current_player = get_stats(driver, 'BR', tr_xpath_start,
                                       './/th[1]', current_player)

        if current_player.batter:
            batter_list.append(current_player.to_dict())
        else:
            pitcher_list.append(current_player.to_dict())

    return {
        'batter_list': batter_list,
        'pitcher_list': pitcher_list,
    }