def get_upcoming_fight_stats( event_and_fight_links: Dict[str, List[str]]) -> str: total_stats = '' l = len(event_and_fight_links) print('Scraping upcoming fight data: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, (event, fights) in enumerate(event_and_fight_links.items()): event_soup = make_soup(event) event_info = get_event_info(event_soup) for fight in fights: try: fight_soup = make_soup(fight) fight_details = get_upcoming_fight_details(fight_soup) except Exception as e: continue total_upcoming_info = fight_details + ';' + event_info if total_stats == '': total_stats = total_upcoming_info else: total_stats = total_stats + '\n' + total_upcoming_info print_progress(index + 1, l, prefix='Progress:', suffix='Complete') return total_stats
def get_fighter_name_and_details( fighter_name_and_link: Dict[str, List[str]]) -> Dict[str, List[str]]: fighter_name_and_details = {} l = len(fighter_name_and_link) print('Scraping all fighter data: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, (fighter_name, fighter_url) in enumerate(fighter_name_and_link.items()): another_soup = make_soup(fighter_url) divs = another_soup.findAll('li', { 'class': "b-list__box-list-item b-list__box-list-item_type_block" }) data = [] for i, div in enumerate(divs): if i == 5: break data.append(div.text.replace(' ', '').replace('\n', '').replace('Height:', '').replace('Weight:', '')\ .replace('Reach:', '').replace('STANCE:', '').replace('DOB:', '')) fighter_name_and_details[fighter_name] = data print_progress(index + 1, l, prefix='Progress:', suffix='Complete') return fighter_name_and_details
def get_fighter_name_and_link( fighter_group_urls: List[str]) -> Dict[str, List[str]]: fighter_name_and_link = {} fighter_name = '' l = len(fighter_group_urls) print('Scraping all fighter names and links: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, fighter_group_url in enumerate(fighter_group_urls): soup = make_soup(fighter_group_url) table = soup.find('tbody') names = table.findAll('a', {'class': 'b-link b-link_style_black'}, href=True) for i, name in enumerate(names): if (i + 1) % 3 != 0: if fighter_name == '': fighter_name = name.text else: fighter_name = fighter_name + ' ' + name.text else: fighter_name_and_link[fighter_name] = name['href'] fighter_name = '' print_progress(index + 1, l, prefix='Progress:', suffix='Complete') return fighter_name_and_link
def get_link_of_upcoming_events_no_pickle(upcoming_events_url: str=UPCOMING_EVENTS_URL) -> List[str]: links = [] url = upcoming_events_url soup = make_soup(upcoming_events_url) for link in soup.findAll('td',{'class': 'b-statistics__table-col'}): for href in link.findAll('a'): foo = href.get('href') links.append(foo) return links
def get_all_odds(odds_url: str = UPCOMING_ODDS_URL) -> List[str]: links = [] url = all_events_url soup = make_soup(UPCOMING_ODDS_URL) for link in soup.findAll('div', {'class': 'op-content-wrapper'}): for href in link.findAll('a'): foo = href.get('href') links.append(foo) return links
def get_event_and_fight_links_no_pickle(event_links: List[str]) -> Dict[str, List[str]]: event_and_fight_links = {} for link in event_links: event_fights = [] soup = make_soup(link) for row in soup.findAll('tr', {'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'}): href = row.get('data-link') event_fights.append(href) event_and_fight_links[link] = event_fights return event_and_fight_links
def get_link_of_past_events(all_events_url: str=ALL_EVENTS_URL) -> List[str]: links = [] soup = make_soup(all_events_url) for link in soup.findAll('td',{'class': 'b-statistics__table-col'}): for href in link.findAll('a'): foo = href.get('href') links.append(foo) pickle_out = open(PAST_EVENT_LINKS_PATH.as_posix(),"wb") pickle.dump(links, pickle_out) pickle_out.close() return links
def get_event_and_fight_links(event_links: List[str]) -> Dict[str, List[str]]: event_and_fight_links = {} for link in event_links: event_fights = [] soup = make_soup(link) for row in soup.findAll('tr', {'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'}): href = row.get('data-link') event_fights.append(href) event_and_fight_links[link] = event_fights pickle_out = open(EVENT_AND_FIGHT_LINKS_PATH.as_posix(),"wb") pickle.dump(event_and_fight_links, pickle_out) pickle_out.close() return event_and_fight_links
def get_fighter_name_and_link( fighter_group_urls: List[str]) -> Dict[str, List[str]]: fighter_name_and_link = {} fighter_name = '' l = len(fighter_group_urls) print('Scraping all fighter names and links: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, fighter_group_url in enumerate(fighter_group_urls): soup = make_soup(fighter_group_url) table = soup.find('tbody') names = table.findAll('a', {'class': 'b-link b-link_style_black'}, href=True) for i, name in enumerate(names): if (i + 1) % 3 != 0: if fighter_name == '': fighter_name = name.text else: fighter_name = fighter_name + ' ' + name.text else: fighter_name_and_link[fighter_name] = name['href'] fighter_name = '' print_progress(index + 1, l, prefix='Progress:', suffix='Complete') pickle_in = open(PAST_FIGHTER_LINKS_PATH.as_posix(), "rb") past_fighter_links = pickle.load(pickle_in) new_fighter_links = list(fighter_name_and_link.values()) fighter_links = np.setdiff1d(new_fighter_links, past_fighter_links) pickle_in.close() #set event links to the newly scraped list pickle_out = open(PAST_FIGHTER_LINKS_PATH.as_posix(), "wb") pickle.dump(new_fighter_links, pickle_out) pickle_out.close() fighter_name_and_link = dict( filter(lambda elem: elem[1] in fighter_links, fighter_name_and_link.items())) return fighter_name_and_link
def get_fight(event_and_fight_links: Dict[str, List[str]]) -> str: dicts = [] links_dict = {} l = len(event_and_fight_links) print('Scraping all fight data: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, (event, fights) in enumerate(event_and_fight_links.items()): for fight in fights: fight_soup = make_soup(fight) fight_details = get_fighter_name_and_link(fight_soup) dicts.append(fight_details) for d in dicts: for k, v in d.items(): links_dict.setdefault(k, []).append(v) links_dict[k] = links_dict[k][0] print(links_dict) print_progress(index + 1, l, prefix='Progress:', suffix='Complete') return links_dict