def get_full_name(): data_div = driver.find_element_by_id('datagrid') data_html = data_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(data_html, "html5lib") for x in soup.tbody.findAll('td'): if x.index == 1: ActionChains(driver).move_to_element(x).click().perform() random_delay() pass id_of_div = "player-vitals" al_div = driver.find_element_by_id(id_of_div) options = al_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(options, "html5lib") for span in soup.findall('h1'): player_name = span.text return player_name pass
def select_HR(): data_div = driver.find_element_by_id('datagrid') data_html = data_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(data_html, "html5lib") for t in soup.thead.findAll('th'): if t.index == 10: ActionChains(driver).move_to_element(t).click().perform random_delay() pass pass
def select_teams(): id_of_div = "top_nav" top = driver.find_element_by_id(id_of_div) options = top.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(options, "html5lib") for t in soup.ul.findAll('li'): if t.id == "st_parent": ActionChains(driver).move_to_element(t).click().perform random_delay() pass pass
def select_NL(): id_of_div = "sp_hitting-1" al_div = driver.find_element_by_id(id_of_div) options = al_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(options, "html5lib") for label in soup.findall('fieldset'): if label.value == "NL": ActionChains(driver).move_to_element(label).click().perform random_delay() pass pass
def select_first_inning(): id_of_div = "sp_hitting-1" al_div = driver.find_element_by_id(id_of_div) options = al_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(options, "html5lib") for label in soup.findall('fieldset'): if label.value == "sp_hitting_hitting_splits": ActionChains(driver).move_to_element(label).click().perform random_delay() first_inning = select_element_by_text(label, 'First Inning') random_delay() ActionChains(driver).move_to_element(first_inning).click().perform() random_delay() pass pass
def scrape_all(driver): # may any id_of_pagination_div = "pagination" pagination_div = driver.find_element_by_id(id_of_pagination_div) buttons = pagination_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(buttons, "html5lib") while soup.fieldset[-1] == "paginationWidget-next": extract_stats_data(driver) random_delay() ActionChains(driver).move_to_element(soup.fieldset[-1]).click().perform() random_delay() if soup.fieldset[-1] != "paginationWidget-next": extract_stats_data(driver) random_delay() ActionChains(driver).move_to_element(soup.fieldset[-1]).click().perform() random_delay() pass pass
def select_all_star(): id_of_div = "sp_hitting-1" al_div = driver.find_element_by_id(id_of_div) options = al_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(options, "html5lib") for opt in soup.fieldset.findall('optgroup'): if opt.value == "Time Frame": for option in opt.findall('option'): if option.value == "preas": ActionChains(driver).move_to_element(option).click().perform random_delay() first_inning = select_element_by_text(label, 'Pre All-Star') random_delay() ActionChains(driver).move_to_element(first_inning).click().perform() random_delay() pass pass pass pass
def extract_stats_data(driver): # your code here, you remember how to use BeautifulSoup, yes? It's soup time. # let me show you something cool data_div = driver.find_element_by_id('datagrid') data_html = data_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(data_html, "html5lib") soup.thead wait = WebDriverWait(driver, 10) normal_delay = random.normalvariate(2, 0.5) for t in soup.thead.findAll('th'): t.replace('▲', '').replace('▼', '') row_head.append(t.text) random_delay() soup.tbody for t in soup.tbody.findAll('td'): t.replace('▲', '').replace('▼', '') row_data.append(t.text) random.delay() data = pd.DataFrame(row_data, columns=row_head) pass
random_delay() pass id_of_div = "player-vitals" al_div = driver.find_element_by_id(id_of_div) options = al_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(options, "html5lib") for span in soup.findall('h1'): player_name = span.text return player_name pass def get_local(name) data_div = driver.find_element_by_id('datagrid') data_html = data_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(data_html, "html5lib") ActionChains(driver).move_to_element(name).click().perform() random_delay() id_of_div = "player-vitals" al_div = driver.find_element_by_id(id_of_div) options = al_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(options, "html5lib") for span in soup.findall('li'): if span.id == 'Born': player_local = span.text return player_local pass pass def answer_question_four(): scrape_all()
def extract_stats_data(driver): data_div = driver.find_element_by_id('datagrid') data_html = data_div.get_attribute('innerHTML') soup = bs4.BeasutifulSoup(data_html, "html5lib")