def __extract_matches_list_of_series(self): soup = Common.get_soup_object(self.__series_link) series_formats = \ soup.find('div', class_='cb-col-100 cb-col cb-nav-main cb-bg-white').find('div').text.split(".")[0] match_info_elements = soup.find_all( 'div', class_='cb-col-60 cb-col cb-srs-mtchs-tm') for match_info_element in match_info_elements: match_title_tag = match_info_element.find( 'a', class_='text-hvr-underline') match_venue_tag = match_info_element.find('div') [match_outcome_text, match_outcome] = self.__extract_match_outcome( match_info_element.find('a', class_='cb-text-link')) [is_valid_match, match_id, match_link, match_format ] = self.__validate_match(match_title_tag, match_venue_tag, match_outcome, series_formats) if is_valid_match: [match_winning_team, win_margin] = Common.get_match_winning_team_and_margin( match_outcome, match_outcome_text) match_object = Match(match_id, match_title_tag.text, match_format, match_venue_tag.text, match_outcome, Common.home_page + match_link, match_winning_team, win_margin) self.__matches_list.append(match_object)
def __extract_series_list_in_calender_year(self): link = Common.home_page + "/cricket-scorecard-archives/" + str(self.year) soup = Common.get_soup_object(link) series_blocks = soup.find_all('a', class_='text-hvr-underline') for index, series_block in enumerate(series_blocks): series_link = series_block.get('href') if ("cricket-series" in series_link) and Common.is_series_valid(series_link): series_id = series_link.split("/")[2] series_title = series_block.text # .split(",")[0] series_link = Common.home_page + series_link series_object = Series(series_id, series_title, self.year, series_link) self.series_list.append(series_object)
def __extract_match_data(self, category): link = self.link.replace("live-cricket-scores", "live-cricket-scorecard") soup = Common.get_soup_object(link) if self.series is None: self.series = self.__extract_series_object(soup, category) self.format = Common.get_match_format(self.title, self.series.format) if self.format is not None: self.__extract_match_info(soup) if self.__is_valid() is True: self.__extract_teams(soup) self.__extract_teams_short_names() self.time = self.__get_match_time() self.is_valid = True
def __init__(self, match_id, title, format, venue, result, match_link, winning_team, margin): self.__id = match_id self.__title = title self.__format = format self.__venue = venue self.__result = result self.__date = 0 # epoch time self.__winning_team = None self.__win_margin = margin # {'team_1_name' : 'team_1_short_name', 'team_2_name':'team_2_short_name'} self.__playing_teams = {} playing_teams = title.split(",")[0].split(" vs ") self.__playing_teams[playing_teams[0]] = playing_teams[0] self.__playing_teams[playing_teams[1]] = playing_teams[1] # India Women Red vs India Women Blue, India Red Won by 7 Wickets # https://www.cricbuzz.com/cricket-scores/20732 India Women Blue vs India Women Green, India Green Won by 7 # Wickets https://www.cricbuzz.com/cricket-scores/20733 if self.__result == 'WIN': self.__winning_team = Common.get_close_match(winning_team, playing_teams) self.__match_link = match_link self.__match_info = {} self.__match_squad = {} self.__innings_scores = [] self.__per_innings_head_to_head_data = [] self.__logger = logging.getLogger(__name__)
def insert(self, bowler_id, match_id, innings_num, wickets_taken, overs_bowled, runs_given, economy, team_id): balls = Common.convert_overs_to_balls(overs_bowled) sql = """INSERT INTO bowling_stats VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""" self.cursor.execute(sql, (bowler_id, match_id, innings_num, wickets_taken, balls, runs_given, economy, team_id))
def __init__(self, title, venue, link, series_object, category): self.id = Common.get_id_from_link(link) self.title = title self.venue = venue # teams is a dictionary in below form # {'team-1' : [team-1's squad], 'team-2' : [team-2's squad]} self.teams = {} self.format = None self.time = None self.is_valid = False self.series = series_object self.link = link self.match_info = {} playing_teams = title.split(",")[0].split(" vs ") self.teams[playing_teams[0]] = { 'short_name': playing_teams[0], 'squad': [] } self.teams[playing_teams[1]] = { 'short_name': playing_teams[1], 'squad': [] } self.__extract_match_data(category)
def __extract_team_squad(self, squad_block): squad = [] player_blocks = squad_block.find_all( 'a', class_='margin0 text-black text-hvr-underline') for player_block in player_blocks: player_id = player_block.get('href').split("/")[2] player_name = player_block.text player_name = Common.correct_player_name(player_name) squad.append(Player(player_name, player_id)) return squad
def __extract_schedule(self): soup = Common.get_soup_object( "https://www.cricbuzz.com/cricket-schedule/upcoming-series/") category_blocks = soup.find_all('div', { 'class': 'cb-col-100 cb-col', 'ng-show': True }) for category_block in category_blocks: if category_block.next_element.text == self.date: category_type = Common.get_category_type( category_block.get('ng-show')) series_blocks = category_block.find_all( 'div', class_='cb-col-100 cb-col') for series_block in series_blocks: series_title = series_block.next_element.text series_object = None if series_title in self.series_data: series_object = self.series_data[series_title] match_blocks = series_block.find_all( 'div', 'cb-ovr-flo cb-col-60 cb-col cb-mtchs-dy-vnu ') if (match_blocks is None) or\ ((match_blocks is not None) and (len(match_blocks) == 0)): # Control comes here in case of multiple matches being played in single day of a series match_blocks = series_block.find_all( 'div', 'cb-ovr-flo cb-col-60 cb-col cb-mtchs-dy-vnu cb-adjst-lst' ) for match_block in match_blocks: match_title_block = match_block.find('a', href=True) match_title = match_title_block.text match_link = Common.home_page + match_title_block.get( 'href') match_venue = match_block.find('div').text match_object = Match(match_title, match_venue, match_link, series_object, category_type) if match_object.is_valid: if series_object is None: series_object = match_object.get_series_object( ) self.series_data[series_title] = series_object series_object.add_match(match_object)
def __init__(self, link, match_squad_ref): self.__short_name_to_full_name_map = {} self.__full_match_commentary = [] self.__per_innings_head_to_head_object_cache = [{}, {}, {}, {}] # {"player_name" : "team_name", ......} self.__local_squad = {} for team in match_squad_ref: for player in match_squad_ref[team]: self.__local_squad[player] = team soup = Common.get_soup_object(link) commentary_blocks = soup.find_all('p', class_='cb-col cb-col-90 cb-com-ln') for commentary_block in reversed(commentary_blocks): ball_commentary = commentary_block.text.split(',') self.__full_match_commentary.append(ball_commentary)
def __extract_innings_batting_scores(self, innings_batting_block): batsman_score_blocks = innings_batting_block.find_all('div', class_='cb-col cb-col-100 cb-scrd-itms') batsman_objects = [] for batsman_score_block in batsman_score_blocks: player_info_block = batsman_score_block.find('div', class_='cb-col cb-col-27 ') if player_info_block is not None: player_name = Common.correct_player_name(player_info_block.text) runs_scored = batsman_score_block.find('div', class_='cb-col cb-col-8 text-right text-bold').text.strip() # (balls, fours, sixes, strikeRate) other_score_blocks = batsman_score_block.find_all('div', class_='cb-col cb-col-8 text-right') balls_played = other_score_blocks[0].text.strip() num_fours = other_score_blocks[1].text.strip() num_sixes = other_score_blocks[2].text.strip() batsman_objects.append(BatsmanScore(player_name, runs_scored, balls_played, num_fours, num_sixes)) return batsman_objects
def __extract_match_info_squad_and_scores(self, series_squad_ref): match_score_card_link = Common.home_page + "/api/html/cricket-scorecard/" + str(self.__id) soup = Common.get_soup_object(match_score_card_link) # Extract Match Info self.__extract_match_info(soup) # Extract Match Squad self.__extract_match_squad(soup, series_squad_ref) # Extract Per-Innings Scores team_innings = soup.find_all('div', id=True) for innings_num, innings_data in enumerate(team_innings): innings_bat_bowl_blocks = innings_data.find_all('div', class_='cb-col cb-col-100 cb-ltst-wgt-hdr') innings_batting_block = innings_bat_bowl_blocks[0] innings_bowling_block = innings_bat_bowl_blocks[1] innings_score_object = self.__extract_innings_total_score(innings_batting_block, innings_num, self.__playing_teams) innings_score_object.set_batting_scores(self.__extract_innings_batting_scores(innings_batting_block)) innings_score_object.set_bowling_scores(self.__extract_innings_bowling_scores(innings_bowling_block)) self.__innings_scores.append(innings_score_object)
def __extract_player_profile(self): default_player_profile = { 'Role': '--', 'Batting Style': '--', 'Bowling Style': '--' } default_keys = default_player_profile.keys() player_link = "http://www.cricbuzz.com/profiles/" + str(self.__id) soup = Common.get_soup_object(player_link) key_tags = soup.find_all( 'div', class_="cb-col cb-col-40 text-bold cb-lst-itm-sm") value_tags = soup.find_all('div', "cb-col cb-col-60 cb-lst-itm-sm") for key, val in zip(key_tags, value_tags): key = key.text.strip() if key in default_keys: default_player_profile[key] = val.text.strip() self.__role = default_player_profile['Role'] self.__batting_style = default_player_profile['Batting Style'] self.__bowling_style = default_player_profile['Bowling Style']
def __validate_match(self, match_title_block, match_venue_block, match_outcome, series_formats): is_valid = False match_format = None match_id = None match_link = None if (match_title_block is not None) and ("cricket-scores" in match_title_block.get('href')) and \ (match_venue_block is not None) and (match_outcome is not None): match_format = Common.get_match_format(match_title_block.text, series_formats) if match_format is not None: match_link = match_title_block.get('href') match_id = match_link.split("/")[2] if self.__db_match_table.check_match_id(match_id): self.__logger.info( "Skipping {}. Available in DB".format(match_id)) else: is_valid = True return [is_valid, match_id, match_link, match_format]
def __extract_innings_bowling_scores(self, innings_bowling_block): bowler_score_blocks = innings_bowling_block.find_all('div', class_='cb-col cb-col-100 cb-scrd-itms ') bowler_objects = [] for bowler_score_block in bowler_score_blocks: player_info_block = bowler_score_block.find('div', class_='cb-col cb-col-40') if player_info_block is not None: player_name = Common.correct_player_name(player_info_block.text) wickets_taken = bowler_score_block.find('div', class_='cb-col cb-col-8 text-right text-bold').text.strip() # Runs Given and Economy runs_and_economy_blocks = bowler_score_block.find_all('div', class_='cb-col cb-col-10 text-right') runs_given = runs_and_economy_blocks[0].text.strip() economy = runs_and_economy_blocks[1].text.strip() # Overs Bowled, Maiden Overs, No Balls, Wide Balls other_score_items = bowler_score_block.find_all('div', class_='cb-col cb-col-8 text-right') overs_bowled = other_score_items[0].text.strip() if len(economy) != 0: # Reason : Wasim Jaffer : https://www.cricbuzz.com/live-cricket-scorecard/19085/vidarbha-vs-chhattisgarh-group-d-ranji-trophy-2017-18 bowler_objects.append(BowlerScore(player_name, overs_bowled, wickets_taken, runs_given, economy)) return bowler_objects
def __extract_match_squad(self, soup, series_squad_ref): squad_tags = soup.find_all('div', {"class" : ["cb-col cb-col-100 cb-minfo-tm-nm", "cb-col cb-col-100 cb-minfo-tm-nm cb-minfo-tm2-nm"]}) team_title = "" for squad_tag in squad_tags: player_blocks = squad_tag.find_all('a', class_='margin0 text-black text-hvr-underline') if len(player_blocks) == 0: team_title = squad_tag.text if "Squad" in team_title : team_title = team_title.split("Squad")[0].strip() self.__match_squad[team_title] = {} if team_title not in series_squad_ref.keys(): series_squad_ref[team_title] = {} else: if len(team_title) == 0 : raise Exception("match_link : {}".format(self.__match_link)) else: for player_block in player_blocks: player_id = player_block.get('href').split("/")[2] player_name = Common.correct_player_name(player_block.text) if player_name not in series_squad_ref[team_title].keys(): series_squad_ref[team_title][player_name] = Player(player_name, player_id) self.__match_squad[team_title][player_name] = series_squad_ref[team_title][player_name]
def __get_player_full_name_from_short_name(self, name): if name not in self.__short_name_to_full_name_map.keys(): close_match = Common.get_close_match(name, self.__local_squad.keys()) self.__short_name_to_full_name_map[name] = close_match return self.__short_name_to_full_name_map[name]
def __extract_match_outcome(self, match_outcome_block): if match_outcome_block is not None: match_outcome = Common.get_match_outcome(match_outcome_block.text) else: match_outcome = None return [match_outcome_block.text, match_outcome]
def get_schedule(self): now = Common.get_date_now() sched = Schedule(now) return sched.get_list_of_series()
def __get_match_time(self): date = self.match_info['Date'].split(" - ")[0].strip() time = self.match_info['Time'] return Common.get_epoch_time_from_gmt(date + ' ' + time)