예제 #1
0
def get_gameIds():
    one_day = timedelta(days=1)
    espn_start_date = date(2019, 1, 11)
    #end_date = date(2002, 10, 6)
    end_date = date(2019, 1, 11)
    today = espn_start_date

    espn = "http://www.espn.com"
    day_url = "{0}/nba/scoreboard/_/date/".format(espn)
    driver = webdriver.Chrome()
    gameId_regex = "^.+gameId=([0-9]+).*"
    games_file = open(
        "{0}/data/nba_gameIds.txt".format(
            my.up_x_dir(my.get_script_directory(), 1)), 'w')

    while (today >= end_date):
        today_url = "{0}{1}".format(day_url, today.strftime("%Y%m%d"))
        driver.get(today_url)
        innerHTML = driver.execute_script("return document.body.innerHTML")
        #print(type(innerHTML))
        soup = my.get_soup_str(innerHTML)
        for link in soup.find_all('a'):
            href = link.get('href')
            if isinstance(href, str):
                if "boxscore" in href and "nba" in href:
                    m = re.match(gameId_regex, href)
                    if m:
                        gameId = m.group(1)
                    games_file.write("{0},{1}\n".format(
                        gameId, today.strftime("%Y%m%d")))
        games_file.flush()
        print(today.strftime("%Y%m%d"))
        today -= one_day
예제 #2
0
    headers = {
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
    }

    driver.get(url)
    innerHTML = driver.execute_script("return document.body.innerHTML")

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(response.status_code)
        print("Invalid Status Code")
        exit(1)

    soup = my.get_soup_str(innerHTML)

    print(soup)
    while another_page:
        for row in soup.find_all('li', {'class': 'item building'}):
            print("here")
            print(row['id'])

        another_page = False
'''
players = []
for row in soup.find_all('div', class_ = 'scorer-7__body'):
    player = []
    player.append(row.find('div', class_ = 'scorer-7__player').text)
    for line in row.find_all('div', class_ = 'sportsbook-outcome-cell no-label'):
        player.append(line.text)
예제 #3
0
def get_games():
    test_string = "400899980,20170107"
    espn = "http://www.espn.com"
    sport = "nba"

    driver = webdriver.Chrome()
    games_file = open(
        "{0}/data/{1}_game_info.txt".format(
            my.up_x_dir(my.get_script_directory(), 1), sport), 'w')
    error_file = open(
        "{0}/data/{1}_ERROR_game_info.txt".format(
            my.up_x_dir(my.get_script_directory(), 1), sport), 'w')

    with open("{0}/data/{1}_gameIds.txt".format(
            my.up_x_dir(my.get_script_directory(), 1), sport)) as f:
        for line in f:
            try:
                game = line.split(',')[0]
                date = line.split(',')[1].rstrip()
                #game = test_string.split(',')[0]
                #date = test_string.split(',')[1]

                print("{0}:{1}".format(game, date))

                url = "{0}/{1}/boxscore?gameId={2}".format(espn, sport, game)
                driver.get(url)
                innerHTML = driver.execute_script(
                    "return document.body.innerHTML")
                soup = my.get_soup_str(innerHTML)
                teams = []
                for team in soup.find_all("span", {"class": "abbrev"}):
                    teams.append(team.string)
                runs = [["one", "away", teams[0]], ["two", "home", teams[1]]]
                name_regex = "^.+\/(.+)\/(.+)$"
                hyphen_regex = "^([0-9]+)-([0-9]+)$"
                header = "gameId,starter,playerId,playerName,playerAbbr,position,team,minutes,fgm,fga,3ptm,3pta,ftm,fta,oreb,dreb,reb,ast,stl,blk,to,pf,plusminus,pts"
                for run in runs:
                    for block in soup.find_all(
                            "div", {
                                "class":
                                "col column-{0} gamepackage-{1}-wrap".format(
                                    run[0], run[1])
                            }):
                        table = block.find('table')
                        table_rows = table.find_all('tr')
                        start = 0
                        for tr in table_rows:
                            if tr.has_attr("class"):
                                if tr.get("class")[0] == "highlight":
                                    continue
                            td = tr.find_all('td')
                            if tr.find('th') is not None:
                                starter = tr.find('th')
                                if tr.find('th').text == "Starters":
                                    start = 1
                                if tr.find('th').text == "Bench":
                                    start = 0
                                continue
                            row = [game, start]
                            for i in td:
                                if i.get("class")[0] == 'name':
                                    a = i.find('a')
                                    abbv = a.find("span", {"class", "abbr"})
                                    href = a.get('href')
                                    if isinstance(href, str):
                                        m = re.match(name_regex, href)
                                        if m:
                                            row.append(m.group(1))
                                            row.append(
                                                m.group(2).replace('-', ' '))

                                    row.append(abbv.text)
                                    for span in i.find_all("span"):
                                        if span.has_attr("class"):
                                            if span.get(
                                                    "class")[0] == "position":
                                                row.append(span.text)
                                    #row.append(i.get("class")[0].find("span", {"class", "position"})).text
                                    row.append(run[2])
                                else:
                                    m = re.match(hyphen_regex, i.text)
                                    if m:
                                        row.extend((m.group(1), m.group(2)))
                                    else:
                                        row.append(i.text)
                            print(','.join(str(x) for x in row))
                            games_file.write("{0}\n".format(','.join(
                                str(x) for x in row)))
            except:
                error_file.write("{1},{0}\n".format(date, game))
            print("Completed - {0},{1}".format(date, game))
    driver.close()