def __extract_matches_list_of_series(self): soup = Common.get_soup_object(self.__series_link) series_formats = \ soup.find('div', class_='cb-col-100 cb-col cb-nav-main cb-bg-white').find('div').text.split(".")[0] match_info_elements = soup.find_all( 'div', class_='cb-col-60 cb-col cb-srs-mtchs-tm') for match_info_element in match_info_elements: match_title_tag = match_info_element.find( 'a', class_='text-hvr-underline') match_venue_tag = match_info_element.find('div') [match_outcome_text, match_outcome] = self.__extract_match_outcome( match_info_element.find('a', class_='cb-text-link')) [is_valid_match, match_id, match_link, match_format ] = self.__validate_match(match_title_tag, match_venue_tag, match_outcome, series_formats) if is_valid_match: [match_winning_team, win_margin] = Common.get_match_winning_team_and_margin( match_outcome, match_outcome_text) match_object = Match(match_id, match_title_tag.text, match_format, match_venue_tag.text, match_outcome, Common.home_page + match_link, match_winning_team, win_margin) self.__matches_list.append(match_object)
def __extract_series_list_in_calender_year(self): link = Common.home_page + "/cricket-scorecard-archives/" + str(self.year) soup = Common.get_soup_object(link) series_blocks = soup.find_all('a', class_='text-hvr-underline') for index, series_block in enumerate(series_blocks): series_link = series_block.get('href') if ("cricket-series" in series_link) and Common.is_series_valid(series_link): series_id = series_link.split("/")[2] series_title = series_block.text # .split(",")[0] series_link = Common.home_page + series_link series_object = Series(series_id, series_title, self.year, series_link) self.series_list.append(series_object)
def __extract_match_data(self, category): link = self.link.replace("live-cricket-scores", "live-cricket-scorecard") soup = Common.get_soup_object(link) if self.series is None: self.series = self.__extract_series_object(soup, category) self.format = Common.get_match_format(self.title, self.series.format) if self.format is not None: self.__extract_match_info(soup) if self.__is_valid() is True: self.__extract_teams(soup) self.__extract_teams_short_names() self.time = self.__get_match_time() self.is_valid = True
def __init__(self, link, match_squad_ref): self.__short_name_to_full_name_map = {} self.__full_match_commentary = [] self.__per_innings_head_to_head_object_cache = [{}, {}, {}, {}] # {"player_name" : "team_name", ......} self.__local_squad = {} for team in match_squad_ref: for player in match_squad_ref[team]: self.__local_squad[player] = team soup = Common.get_soup_object(link) commentary_blocks = soup.find_all('p', class_='cb-col cb-col-90 cb-com-ln') for commentary_block in reversed(commentary_blocks): ball_commentary = commentary_block.text.split(',') self.__full_match_commentary.append(ball_commentary)
def __extract_match_info_squad_and_scores(self, series_squad_ref): match_score_card_link = Common.home_page + "/api/html/cricket-scorecard/" + str(self.__id) soup = Common.get_soup_object(match_score_card_link) # Extract Match Info self.__extract_match_info(soup) # Extract Match Squad self.__extract_match_squad(soup, series_squad_ref) # Extract Per-Innings Scores team_innings = soup.find_all('div', id=True) for innings_num, innings_data in enumerate(team_innings): innings_bat_bowl_blocks = innings_data.find_all('div', class_='cb-col cb-col-100 cb-ltst-wgt-hdr') innings_batting_block = innings_bat_bowl_blocks[0] innings_bowling_block = innings_bat_bowl_blocks[1] innings_score_object = self.__extract_innings_total_score(innings_batting_block, innings_num, self.__playing_teams) innings_score_object.set_batting_scores(self.__extract_innings_batting_scores(innings_batting_block)) innings_score_object.set_bowling_scores(self.__extract_innings_bowling_scores(innings_bowling_block)) self.__innings_scores.append(innings_score_object)
def __extract_player_profile(self): default_player_profile = { 'Role': '--', 'Batting Style': '--', 'Bowling Style': '--' } default_keys = default_player_profile.keys() player_link = "http://www.cricbuzz.com/profiles/" + str(self.__id) soup = Common.get_soup_object(player_link) key_tags = soup.find_all( 'div', class_="cb-col cb-col-40 text-bold cb-lst-itm-sm") value_tags = soup.find_all('div', "cb-col cb-col-60 cb-lst-itm-sm") for key, val in zip(key_tags, value_tags): key = key.text.strip() if key in default_keys: default_player_profile[key] = val.text.strip() self.__role = default_player_profile['Role'] self.__batting_style = default_player_profile['Batting Style'] self.__bowling_style = default_player_profile['Bowling Style']
def __extract_schedule(self): soup = Common.get_soup_object( "https://www.cricbuzz.com/cricket-schedule/upcoming-series/") category_blocks = soup.find_all('div', { 'class': 'cb-col-100 cb-col', 'ng-show': True }) for category_block in category_blocks: if category_block.next_element.text == self.date: category_type = Common.get_category_type( category_block.get('ng-show')) series_blocks = category_block.find_all( 'div', class_='cb-col-100 cb-col') for series_block in series_blocks: series_title = series_block.next_element.text series_object = None if series_title in self.series_data: series_object = self.series_data[series_title] match_blocks = series_block.find_all( 'div', 'cb-ovr-flo cb-col-60 cb-col cb-mtchs-dy-vnu ') if (match_blocks is None) or\ ((match_blocks is not None) and (len(match_blocks) == 0)): # Control comes here in case of multiple matches being played in single day of a series match_blocks = series_block.find_all( 'div', 'cb-ovr-flo cb-col-60 cb-col cb-mtchs-dy-vnu cb-adjst-lst' ) for match_block in match_blocks: match_title_block = match_block.find('a', href=True) match_title = match_title_block.text match_link = Common.home_page + match_title_block.get( 'href') match_venue = match_block.find('div').text match_object = Match(match_title, match_venue, match_link, series_object, category_type) if match_object.is_valid: if series_object is None: series_object = match_object.get_series_object( ) self.series_data[series_title] = series_object series_object.add_match(match_object)