Python Common.get_soup_objectの例

プログラミング言語: Python

名前空間/パッケージ名: scraper.common_util

クラス/型: Common

メソッド/関数: get_soup_object

hotexamples.comのコード掲載数: 7

Python Common.get_soup_object - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのscraper.common_util.Common.get_soup_objectの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

get_soup_object(7)

correct_player_name(4)

get_close_match(2)

get_match_format(2)

convert_overs_to_balls(1)

get_category_type(1)

get_date_now(1)

get_epoch_time_from_gmt(1)

get_id_from_link(1)

get_match_outcome(1)

get_match_winning_team_and_margin(1)

is_series_valid(1)

コード例 #1

ファイルを表示

 def __extract_matches_list_of_series(self):
     soup = Common.get_soup_object(self.__series_link)
     series_formats = \
         soup.find('div', class_='cb-col-100 cb-col cb-nav-main cb-bg-white').find('div').text.split(".")[0]
     match_info_elements = soup.find_all(
         'div', class_='cb-col-60 cb-col cb-srs-mtchs-tm')
     for match_info_element in match_info_elements:
         match_title_tag = match_info_element.find(
             'a', class_='text-hvr-underline')
         match_venue_tag = match_info_element.find('div')
         [match_outcome_text, match_outcome] = self.__extract_match_outcome(
             match_info_element.find('a', class_='cb-text-link'))
         [is_valid_match, match_id, match_link, match_format
          ] = self.__validate_match(match_title_tag, match_venue_tag,
                                    match_outcome, series_formats)
         if is_valid_match:
             [match_winning_team,
              win_margin] = Common.get_match_winning_team_and_margin(
                  match_outcome, match_outcome_text)
             match_object = Match(match_id, match_title_tag.text,
                                  match_format, match_venue_tag.text,
                                  match_outcome,
                                  Common.home_page + match_link,
                                  match_winning_team, win_margin)
             self.__matches_list.append(match_object)

コード例 #2

ファイルを表示

 def __extract_series_list_in_calender_year(self):
     link = Common.home_page + "/cricket-scorecard-archives/" + str(self.year)
     soup = Common.get_soup_object(link)
     series_blocks = soup.find_all('a', class_='text-hvr-underline')
     for index, series_block in enumerate(series_blocks):
         series_link = series_block.get('href')
         if ("cricket-series" in series_link) and Common.is_series_valid(series_link):
             series_id = series_link.split("/")[2]
             series_title = series_block.text  # .split(",")[0]
             series_link = Common.home_page + series_link
             series_object = Series(series_id, series_title, self.year, series_link)
             self.series_list.append(series_object)

コード例 #3

ファイルを表示

 def __extract_match_data(self, category):
     link = self.link.replace("live-cricket-scores",
                              "live-cricket-scorecard")
     soup = Common.get_soup_object(link)
     if self.series is None:
         self.series = self.__extract_series_object(soup, category)
     self.format = Common.get_match_format(self.title, self.series.format)
     if self.format is not None:
         self.__extract_match_info(soup)
         if self.__is_valid() is True:
             self.__extract_teams(soup)
             self.__extract_teams_short_names()
             self.time = self.__get_match_time()
             self.is_valid = True

コード例 #4

ファイルを表示

    def __init__(self, link, match_squad_ref):
        self.__short_name_to_full_name_map = {}
        self.__full_match_commentary = []
        self.__per_innings_head_to_head_object_cache = [{}, {}, {}, {}]

        # {"player_name" : "team_name", ......}
        self.__local_squad = {}
        for team in match_squad_ref:
            for player in match_squad_ref[team]:
                self.__local_squad[player] = team

        soup = Common.get_soup_object(link)
        commentary_blocks = soup.find_all('p', class_='cb-col cb-col-90 cb-com-ln')
        for commentary_block in reversed(commentary_blocks):
            ball_commentary = commentary_block.text.split(',')
            self.__full_match_commentary.append(ball_commentary)

コード例 #5

ファイルを表示

 def __extract_match_info_squad_and_scores(self, series_squad_ref):
     match_score_card_link = Common.home_page + "/api/html/cricket-scorecard/" + str(self.__id)
     soup = Common.get_soup_object(match_score_card_link)
     # Extract Match Info
     self.__extract_match_info(soup)
     # Extract Match Squad
     self.__extract_match_squad(soup, series_squad_ref)
     # Extract Per-Innings Scores
     team_innings = soup.find_all('div', id=True)
     for innings_num, innings_data in enumerate(team_innings):
         innings_bat_bowl_blocks = innings_data.find_all('div', class_='cb-col cb-col-100 cb-ltst-wgt-hdr')
         innings_batting_block = innings_bat_bowl_blocks[0]
         innings_bowling_block = innings_bat_bowl_blocks[1]
         innings_score_object = self.__extract_innings_total_score(innings_batting_block,
                                                                   innings_num, self.__playing_teams)
         innings_score_object.set_batting_scores(self.__extract_innings_batting_scores(innings_batting_block))
         innings_score_object.set_bowling_scores(self.__extract_innings_bowling_scores(innings_bowling_block))
         self.__innings_scores.append(innings_score_object)

コード例 #6

ファイルを表示

ファイル: player.py プロジェクト: mathsdada/mathsdada

 def __extract_player_profile(self):
     default_player_profile = {
         'Role': '--',
         'Batting Style': '--',
         'Bowling Style': '--'
     }
     default_keys = default_player_profile.keys()
     player_link = "http://www.cricbuzz.com/profiles/" + str(self.__id)
     soup = Common.get_soup_object(player_link)
     key_tags = soup.find_all(
         'div', class_="cb-col cb-col-40 text-bold cb-lst-itm-sm")
     value_tags = soup.find_all('div', "cb-col cb-col-60 cb-lst-itm-sm")
     for key, val in zip(key_tags, value_tags):
         key = key.text.strip()
         if key in default_keys:
             default_player_profile[key] = val.text.strip()
     self.__role = default_player_profile['Role']
     self.__batting_style = default_player_profile['Batting Style']
     self.__bowling_style = default_player_profile['Bowling Style']

コード例 #7

ファイルを表示

    def __extract_schedule(self):
        soup = Common.get_soup_object(
            "https://www.cricbuzz.com/cricket-schedule/upcoming-series/")
        category_blocks = soup.find_all('div', {
            'class': 'cb-col-100 cb-col',
            'ng-show': True
        })
        for category_block in category_blocks:
            if category_block.next_element.text == self.date:
                category_type = Common.get_category_type(
                    category_block.get('ng-show'))
                series_blocks = category_block.find_all(
                    'div', class_='cb-col-100 cb-col')
                for series_block in series_blocks:
                    series_title = series_block.next_element.text
                    series_object = None
                    if series_title in self.series_data:
                        series_object = self.series_data[series_title]
                    match_blocks = series_block.find_all(
                        'div', 'cb-ovr-flo cb-col-60 cb-col cb-mtchs-dy-vnu ')
                    if (match_blocks is None) or\
                            ((match_blocks is not None) and (len(match_blocks) == 0)):
                        # Control comes here in case of multiple matches being played in single day of a series
                        match_blocks = series_block.find_all(
                            'div',
                            'cb-ovr-flo cb-col-60 cb-col cb-mtchs-dy-vnu cb-adjst-lst'
                        )
                    for match_block in match_blocks:
                        match_title_block = match_block.find('a', href=True)
                        match_title = match_title_block.text
                        match_link = Common.home_page + match_title_block.get(
                            'href')
                        match_venue = match_block.find('div').text

                        match_object = Match(match_title, match_venue,
                                             match_link, series_object,
                                             category_type)
                        if match_object.is_valid:
                            if series_object is None:
                                series_object = match_object.get_series_object(
                                )
                                self.series_data[series_title] = series_object
                            series_object.add_match(match_object)