def get_recruiting_timeline(player_id, player_profile_url, full_name): recruiting_profile_html = http_get(f'{player_profile_url}/TimelineEvents') base_page = BeautifulSoup(recruiting_profile_html, 'html.parser') number_of_timeline_pages = 1 try: number_of_timeline_pages = int(base_page.find_all('a', class_='pagn_link')[-2].text) except: print(emoji.emojize(f':warning: Couldn\'t parse pagination for {full_name}', use_aliases=True)) event_output_list = [] timeline_ul = base_page.find('ul', class_='timeline-event-index_lst') timeline_lis = timeline_ul.find_all('li') for page in range(0, number_of_timeline_pages): if page != 0: recruiting_profile_html = http_get(f'{player_profile_url}/TimelineEvents/?page={page}') base_page = BeautifulSoup(recruiting_profile_html, 'html.parser') timeline_ul = base_page.find('ul', class_='timeline-event-index_lst') timeline_lis = timeline_ul.find_all('li') for list_item in timeline_lis: event_output = { '247_id': player_id } date_and_event_split = list_item.find('b').text.split(': ') event_output['event_date'] = convert_to_year_month_day_from_word_month(date_and_event_split[0]) event_output['event_type'] = date_and_event_split[1] paragraph_tags = list_item.find_all('p') event_output['event_description'] = paragraph_tags[1].text event_output_list.append(event_output) return event_output_list
def get_ranking_history_url(player_profile_url, full_name): html = http_get(player_profile_url) base_page = BeautifulSoup(html, 'html.parser') try: return base_page.find('a', class_='rank-history-link')['href'] except: print(emoji.emojize(f':thumbsdown: Error parsing recruiting ranking history page for {full_name}', use_aliases=True))
def get_number_of_pages_for_year(year, recruits_per_page): url = f'https://247sports.com/Season/{year}-Football/CompositeRecruitRankings/' html = http_get(url) base_page = BeautifulSoup(html, 'html.parser') count_span = base_page.find('span', class_='count') recruit_count = int(count_span.text.strip(' ()')) return math.ceil(recruit_count / recruits_per_page)
def get_recruiting_ranking_history(player_id, recruiting_ranking_url, full_name): recruiting_ranking_html = http_get(recruiting_ranking_url) base_page = BeautifulSoup(recruiting_ranking_html, 'html.parser') recruiting_ranking_ul = base_page.find('ul', class_='ranking-history-list') recruiting_ranking_lis = recruiting_ranking_ul.find_all('li', class_='') ranking_history_output = [] for list_item in recruiting_ranking_lis: try: try: rank_text = list_item.find('span', class_='rank').text rank_value = float(rank_text) if rank_text != '-' else 0.0 except: rank_value = -1.0 rating_text = list_item.find('span', class_='rating').text rating_value = float(rating_text) if rating_text != '-' else 0.0 delta_element = list_item.find('span', class_='last') delta_value = float(delta_element.text) if delta_element.text != '-' else 0.0 delta_value = delta_value * -1.0 if 'red' in delta_element['class'] else delta_value delta_inception_element = list_item.find('span', class_='inception') delta_inception_value = float(delta_inception_element.text) if delta_inception_element.text != '-' else 0.0 delta_inception_value = delta_inception_value * -1.0 if 'red' in delta_inception_element[ 'class'] else delta_inception_value ranking_history_output.append({ '247_id': player_id, 'rating': rating_value, 'rank': rank_value, 'change_date': convert_to_year_month_day_from_number_month( list_item.find('span', class_='change-date').text), 'delta': delta_value, 'delta_inception': delta_inception_value }) except: print(emoji.emojize(f':thumbsdown: Error parsing ranking history for {full_name}', use_aliases=True)) return ranking_history_output
def parse_page_of_recruits(url, recruit_list, year): html = http_get(url) base_page = BeautifulSoup(html, 'html.parser') rankings_page_div = base_page.find('div', class_='rankings-page__main') list_items = rankings_page_div.find_all('li', class_='rankings-page__list-item') for list_item in list_items: recruit = {} name_link = list_item.find('a', class_='rankings-page__name-link') if name_link is not None: recruit = { '247_id': name_link['href'].split('-')[-1], '247_url': f'https://247sports.com{name_link["href"]}', 'full_name': name_link.text, 'year': year, 'position': list_item.find('div', class_='position').text.strip() } meta_span_text = list_item.find('span', class_='meta').text try: high_school_and_hometown = re.findall( '^([\w|\W+\s?]+)\((.*?)\)', meta_span_text)[0] recruit['high_school'] = high_school_and_hometown[0].strip() hometown_split = high_school_and_hometown[1].split(', ') recruit['city'] = hometown_split[0] recruit['state'] = hometown_split[1] except: recruit['high_school'] = '' recruit['city'] = '' recruit['state'] = '' print( emoji.emojize( f':thumbsdown: Error parsing high school and hometown "{meta_span_text}" for {recruit["full_name"]}', use_aliases=True)) ranking_div = list_item.find( 'div', class_='rankings-page__star-and-score') recruit['score'] = ranking_div.find('span', class_='score').text star_list = ranking_div.find_all('span', class_='icon-starsolid yellow') recruit['stars'] = len(star_list) try: height_and_weight_text = list_item.find( 'div', class_='metrics').text.strip() height_and_weight_split = height_and_weight_text.split(' / ') height_split = height_and_weight_split[0].split('-') recruit['height_feet'] = float(height_split[0]) recruit['height_inches'] = float(height_split[1]) recruit['weight'] = float(height_and_weight_split[1]) except: recruit['height_feet'] = 0.0 recruit['height_inches'] = 0.0 recruit['weight'] = 0.0 print( emoji.emojize( f':thumbsdown: Error parsing height, feet, and weight "{height_and_weight_text}" for {recruit["full_name"]}', use_aliases=True)) recruit_list.append(recruit)