def get_fighter_name_and_details( fighter_name_and_link: Dict[str, List[str]]) -> Dict[str, List[str]]: fighter_name_and_details = {} l = len(fighter_name_and_link) print('Scraping all fighter data: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, (fighter_name, fighter_url) in enumerate(fighter_name_and_link.items()): another_soup = make_soup(fighter_url) divs = another_soup.findAll('li', { 'class': "b-list__box-list-item b-list__box-list-item_type_block" }) data = [] for i, div in enumerate(divs): if i == 5: break data.append(div.text.replace(' ', '').replace('\n', '').replace('Height:', '').replace('Weight:', '')\ .replace('Reach:', '').replace('STANCE:', '').replace('DOB:', '')) fighter_name_and_details[fighter_name] = data print_progress(index + 1, l, prefix='Progress:', suffix='Complete') return fighter_name_and_details
def get_fighter_name_and_link( fighter_group_urls: List[str]) -> Dict[str, List[str]]: fighter_name_and_link = {} fighter_name = '' l = len(fighter_group_urls) print('Scraping all fighter names and links: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, fighter_group_url in enumerate(fighter_group_urls): soup = make_soup(fighter_group_url) table = soup.find('tbody') names = table.findAll('a', {'class': 'b-link b-link_style_black'}, href=True) for i, name in enumerate(names): if (i + 1) % 3 != 0: if fighter_name == '': fighter_name = name.text else: fighter_name = fighter_name + ' ' + name.text else: fighter_name_and_link[fighter_name] = name['href'] fighter_name = '' print_progress(index + 1, l, prefix='Progress:', suffix='Complete') return fighter_name_and_link
def get_upcoming_fight_stats( event_and_fight_links: Dict[str, List[str]]) -> str: total_stats = '' l = len(event_and_fight_links) print('Scraping upcoming fight data: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, (event, fights) in enumerate(event_and_fight_links.items()): event_soup = make_soup(event) event_info = get_event_info(event_soup) for fight in fights: try: fight_soup = make_soup(fight) fight_details = get_upcoming_fight_details(fight_soup) except Exception as e: continue total_upcoming_info = fight_details + ';' + event_info if total_stats == '': total_stats = total_upcoming_info else: total_stats = total_stats + '\n' + total_upcoming_info print_progress(index + 1, l, prefix='Progress:', suffix='Complete') return total_stats
def get_fighter_name_and_link( fighter_group_urls: List[str]) -> Dict[str, List[str]]: fighter_name_and_link = {} fighter_name = '' l = len(fighter_group_urls) print('Scraping all fighter names and links: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, fighter_group_url in enumerate(fighter_group_urls): soup = make_soup(fighter_group_url) table = soup.find('tbody') names = table.findAll('a', {'class': 'b-link b-link_style_black'}, href=True) for i, name in enumerate(names): if (i + 1) % 3 != 0: if fighter_name == '': fighter_name = name.text else: fighter_name = fighter_name + ' ' + name.text else: fighter_name_and_link[fighter_name] = name['href'] fighter_name = '' print_progress(index + 1, l, prefix='Progress:', suffix='Complete') pickle_in = open(PAST_FIGHTER_LINKS_PATH.as_posix(), "rb") past_fighter_links = pickle.load(pickle_in) new_fighter_links = list(fighter_name_and_link.values()) fighter_links = np.setdiff1d(new_fighter_links, past_fighter_links) pickle_in.close() #set event links to the newly scraped list pickle_out = open(PAST_FIGHTER_LINKS_PATH.as_posix(), "wb") pickle.dump(new_fighter_links, pickle_out) pickle_out.close() fighter_name_and_link = dict( filter(lambda elem: elem[1] in fighter_links, fighter_name_and_link.items())) return fighter_name_and_link
def run(self): dataframes_ = [] l = len(self.unique_fighters) print_progress(0, l, prefix='Progress:', suffix='Complete') for index, fighter in enumerate(self.unique_fighters): dataframes_.append(self.retrive_odds(fighter)) print_progress(index + 1, l, prefix='Progress:', suffix='Complete') result = pd.concat(dataframes_) result[ 'duplicates'] = result.Fighter_one + result.Fighter_two + result.Average_Odds_f2 + result.Average_Odds_f1 result['duplicates'] = result['duplicates'].apply( lambda x: x.replace(" ", "").replace(".", "")) result['duplicates'] = result['duplicates'].apply( lambda x: ''.join(sorted(x))) result.drop_duplicates(subset='duplicates', keep="first", inplace=True) result.to_csv(self.Fightersfile, index=False)
def get_fight(event_and_fight_links: Dict[str, List[str]]) -> str: dicts = [] links_dict = {} l = len(event_and_fight_links) print('Scraping all fight data: ') print_progress(0, l, prefix='Progress:', suffix='Complete') for index, (event, fights) in enumerate(event_and_fight_links.items()): for fight in fights: fight_soup = make_soup(fight) fight_details = get_fighter_name_and_link(fight_soup) dicts.append(fight_details) for d in dicts: for k, v in d.items(): links_dict.setdefault(k, []).append(v) links_dict[k] = links_dict[k][0] print(links_dict) print_progress(index + 1, l, prefix='Progress:', suffix='Complete') return links_dict