Ejemplo n.º 1
0
    def getting_players_info(self, nodes):
        result = []
        for node in nodes:

            request = parsing_functions.get_request(node.url)
            if request is None: return
            try:
                soup = bs(request.text, 'html.parser')
                content = soup.find(
                    'div', 'js-tournament-filter-content').tbody.findAll('tr')
                data = {}
                for item in content:
                    name = item.find(attrs={
                        'class': 'table-item__name'
                    }).text.lstrip().rstrip()
                    role = item.find(attrs={
                        'data-label': 'Амплуа'
                    }).text.lstrip().rstrip()
                    birth = item.find(attrs={
                        'data-label': 'ДР'
                    }).text.lstrip().rstrip()
                    growth = item.find(attrs={
                        'data-label': 'Рост'
                    }).text.lstrip().rstrip()
                    weight = item.find(attrs={
                        'data-label': 'Вес'
                    }).text.lstrip().rstrip()

                    player_url = self.site + item.find('a',
                                                       'table-item')['href']
                    request = parsing_functions.get_request(player_url)
                    nationality = bs(request.text, 'html.parser').find('div', text='Гражданство:') \
                        .next_sibling.lstrip().rstrip() if request is not None else Exception

                    data = {
                        'id': None,
                        'team_id': node.data['id'],
                        'name': name,
                        'nationality': nationality,
                        'role': role,
                        'birth': birth,
                        'growth': growth,
                        'weight': weight
                    }
            except Exception as e:
                print('Ошибка парсинга игроков')
                print(e)

            return data
Ejemplo n.º 2
0
    def parse_teams(self, nodes):
        for node in nodes:
            request = parsing_functions.get_request(node.url + 'teams')
            if request is None:
                return
            try:
                soup = bs(request.text, 'html.parser')
                content = soup.find_all('a', 'teams-item__link')
                for item in content:
                    name = item.find(
                        'div',
                        'teams-item__name').get_text().lstrip().rstrip()
                    city = item.find(
                        'div',
                        'teams-item__country').get_text().lstrip().rstrip()
                    data = {
                        'id': None,
                        'tournament_id': node.data['id'],
                        'name': name,
                        'city': city
                    }
                    team = Node(self.ParsingTypes.team, data)
                    team.url = self.site + item['href'].replace(
                        'result/', 'players/')
                    team.parents.append(node)

                    node.add_node(team)

                    self.parsed_teams += 1
            except Exception as e:
                print('Ошибка парсинга команд')
                print(e)
    def GetMatchLinks(self, url):
        try:
            Request = parsing_functions.get_request(url)
            Soup = Bs(Request.text, 'html.parser')

            Items = Soup.find_all('td', 'stat-results__link')
            Links = [self.Site + item.a['href'] for item in Items]
            return Links
        except Exception as Err:
            raise Exception(Err)
Ejemplo n.º 4
0
    def parse_matches(self, url, parents):
        request = parsing_functions.get_request(url)
        if request is None:
            return
        try:
            soup = bs4.BeautifulSoup(request.text, 'html.parser')
            content = soup.find('div', 'sport__table__tstat').find_all('tr')
            content.pop(0)
            match_nodes = []
            for html_row in content:
                html_columns = html_row.find_all('td',
                                                 'sport__table__tstat__td')
                tour = html_columns[0].get_text().lstrip().rstrip()
                match_date = html_columns[1].get_text().lstrip().rstrip()

                teams = html_columns[3].find_all('a')
                home = teams[0].get_text().lstrip().rstrip()
                guest = teams[1].get_text().lstrip().rstrip()

                match_result = html_columns[4].a.get_text().lstrip().rstrip(
                ).split(' ')

                penalty_home, penalty_guest, home_result, guest_result = None, None, None, None

                home_result = match_result[0].split(':')[0]
                guest_result = match_result[0].split(':')[1]
                if len(match_result) == 2:
                    if re.match(r"\d:\d",
                                match_result[1]) is not None:  #пенальти
                        penalty_home = match_result[1].split(':')[0]
                        penalty_guest = match_result[1].split(':')[1]

                data = {
                    'id': 0,
                    'home_team_id': 0,
                    'guest_team_id': 0,
                    'match_date': match_date,
                    'home_team': home,
                    'guest_team': guest,
                    'home_score': home_result,
                    'guest_score': guest_result,
                    'home_penalty_score': penalty_home,
                    'guest_penalty_score': penalty_guest
                }

                node = Node(self.ParsingTypes.match, data)

                node.parents.extend(parents)
                for parent in parents:
                    parent.add_player_node(node)

                self.parsed_matches += 1
        except Exception as e:
            print('Ошибка парсинга матчей')
            print(e)
 def GetSeasons(url, proxy):
     try:
         Request = parsing_functions.get_request(url, proxy)
         Soup = Bs(Request.text, 'html.parser')
         ContentTag = Soup.find('div', 'js-tournament-header-year')
         if ContentTag:
             Items = ContentTag.find_all('option')
             if Items:
                 Seasons = [item['value'] for item in Items]
                 return Seasons
     except Exception as Err:
         raise Exception(Err)
Ejemplo n.º 6
0
    def parse_tournaments(self, url):
        request = parsing_functions.get_request(url)
        if request is None:
            return
        try:
            soup = bs(request.text, 'html.parser')
            content = soup.find_all('div', 'mc-sport-tournament-list__item')
            if content is None:
                return
            for item in content:
                country = item.find(
                    'div', 'item__title').get_text().lstrip().rstrip()
                html_links = item.find_all(attrs={"data-type": "tournament"})
                for html_link in html_links:
                    # html_id = re.findall(r'/(\d+)', html_link['href'])[0]
                    # html_type = re.findall(r'_\w+', html_link['href'])[0]
                    t_name = html_link['data-title'].lstrip().rstrip()

                    html_link.find('span', 'separator').extract()

                    t_dates_html = html_link.findNext(
                        'div', 'item__dates _dates').findAll('span')
                    t_start_date = datetime.strptime(
                        t_dates_html[0].get_text().lstrip().rstrip(),
                        "%d.%m.%Y")
                    t_end_date = datetime.strptime(
                        t_dates_html[1].get_text().lstrip().rstrip(),
                        "%d.%m.%Y")
                    # tournament_url = html_type + "/" + html_id + "/tournir/info.html"
                    # teams_url = self.common_url + html_type + "/" + html_id + "/teams.html"
                    # plays_url = html_type + "/" + html_id + "/calendar.html"
                    # plays_group_url = html_type + "/" + html_id + "/calendar/group.html"
                    # plays_playoff_url = html_type + "/" + html_id + "/calendar/playoff.html"
                    # plays_preliminary_url = html_type + "/" + html_id + "/calendar/preliminary.html"

                    data = {
                        'id': None,
                        'name': t_name,
                        'country': country,
                        'start_date': t_start_date,
                        'end_date': t_end_date
                    }

                    node = Node(self.ParsingTypes.tournament, data)
                    node.url = self.site + html_link['href']
                    self.root.add_node(node)
                    node.parents.append(self.root)

                    self.parsed_tournaments += 1

        except Exception as e:
            print('Ошибка парсинга турниров')
            print(e)
    def GetTournamentLinks(self, url):
        try:
            Request = parsing_functions.get_request(url)
            Soup = Bs(Request.text, 'html.parser')

            Content = Soup.find('div', 'mc-sport-tournament-list')
            Items = Content.find_all('a')
            Links = []
            for item in Items:
                TournamentLink = self.Site + item['href'] + 'calendar'
                MatchLinks = self.GetMatchLinks(TournamentLink)
                Links.append([TournamentLink, MatchLinks])
            return Links
        except Exception as Err:
            raise Exception(Err)
 def ParseLinks(self, season=None):
     try:
         self.log.info("Getting links for parsing.")
         Request = parsing_functions.get_request(self.StartUrl)
         Soup = Bs(Request.text, 'html.parser')
         ContentTag = Soup.find('div', 'js-tournament-header-year')
         if ContentTag:
             Items = [item['data-href'] for item in ContentTag.find_all('option')if item['value'] == season]
             if Items:
                 Links = []
                 if season:
                     Links = self.GetTournamentLinks(self.Site + Items[0])
                 return Links
     except Exception as Err:
         raise Exception(f"Error parsing season links: {Err}")
Ejemplo n.º 9
0
def parse_season(url):
    try:
        request = parsing_functions.get_request(url)
        if request is None:
            return

        soup = bs4.BeautifulSoup(request.text, 'html.parser')
        content = soup.find('div',
                            'js-tournament-header-year').find_all('option')
        if content is None:
            return
        season_links = []
        for item in content:
            season_url = "https://www.championat.com" + item['data-href']
            season = item.get_text().lstrip().rstrip().split("/")[0]
            season_links.append({'url': season_url, 'season': season})
        return season_links
    except Exception as e:
        print(e)
        return
Ejemplo n.º 10
0
    def parse_seasons(self):
        request = parsing_functions.get_request(self.url)
        if request is None:
            return
        try:
            soup = bs4.BeautifulSoup(request.text, 'html.parser')
            content = soup.find('div',
                                'js-tournament-header-year').find_all('option')
            if content is None:
                return

            for item in content:
                season_url = self.site + item['data-href']
                html_season_year = item.get_text().lstrip().rstrip().split(
                    "/")[0]
                season = datetime.strptime(html_season_year, '%Y')
                season_node = SeasonNode(season, season_url)
                self.root.add_node(season_node)
        except Exception as e:
            print('Ошибка парсинга сезонов')
            print(e)
    def ParseMatches(self, matchLink: str):
        Url = ''
        try:
            Root = Node(key=ParsingTypes.tournament)
            Url = matchLink
            Request = parsing_functions.get_request(Url)
            Soup = Bs(Request.text, 'html.parser')

            if not Root.data: Root.data = {k: v for k, v in self.GetTournament(Soup).items()}
            MatchNode = Node(key=ParsingTypes.match)

            ExtraInfoTag = Soup.find('div', 'match-info__extra')
            Stadium: str = GetStadium(ExtraInfoTag.find(text=re.compile('Стадион:')))
            Referee: str = GetReferee(ExtraInfoTag.find(text=re.compile('Главный судья:')))

            StatTag = Soup.find('div', attrs={'data-type': 'stats'})
            Stats = GetMatchStat(StatTag, StatAssociations)

            Lineups = GetLineups(Soup)
            Teams = GetTeams(Soup.find('div', 'match-info__scoreboard'))
            if Teams is None: return None
            Teams[0].set_child(Stats['Home'])
            Teams[1].set_child(Stats['Guest'])
            MatchNode.AddChildren(Teams)
            [Teams[0].set_child(lineup) for lineup in Lineups[0]]
            [Teams[1].set_child(lineup) for lineup in Lineups[1]]

            for goal in GetGoalsStat(StatTag):
                Player = MatchNode.search_node(goal['Kicker'], 'Name', ParsingTypes.Lineup)
                Assistant = MatchNode.search_node(goal['Assistant'], 'Name', ParsingTypes.Lineup)

                if Player:
                    Goal = Node(key=ParsingTypes.Goal, data={k: v for k, v in goal.items()})
                    Player.set_child(Goal)

                    if Assistant:
                        Assist = Node(key=ParsingTypes.Assist, data={k: v for k, v in goal.items()})
                        Assistant.set_child(Assist)

            for punish in GetPunishmentsStat(StatTag):
                PunishNode = Node(key=ParsingTypes.Punishment, data={k: v for k, v in punish.items()})
                Player = MatchNode.search_node(punish['Player'], 'Name', ParsingTypes.Lineup)
                if Player: Player.set_child(PunishNode)

            for miss in GetMissPenaltiesStat(StatTag):
                MissNode = Node(key=ParsingTypes.MissPenalty, data={k: v for k, v in miss.items()})
                Player = MatchNode.search_node(miss['Player'], 'Name', ParsingTypes.Lineup)
                if Player: Player.set_child(MissNode)

            # Penalties = [{'PlayerId': 974, 'Result': 'Гол', 'Score':'1:0'},
            #              {'PlayerId': 74222, 'Result': 'Мимо', 'Score': '1:0'},
            #              {'PlayerId': 823, 'Result': 'Гол', 'Score': '1:1'},
            #              {'PlayerId': 209, 'Result': 'Гол', 'Score': '1:2'}]
            for penalty in GetPenalties(StatTag):
                Penalty = Node(key=ParsingTypes.Penalty, data={k: v for k, v in penalty.items()})
                Lineup = MatchNode.search_node(penalty['PlayerId'], 'Id', ParsingTypes.Lineup)
                if Lineup: Lineup.set_child(Penalty)

            MainScore: list = GetMainScore(Soup.find('div', 'match-info__scoreboard'))
            PenaltyScore: list = GetPenaltyScore(Soup.find('div', 'match-info__count-extra'))
            TechScore: list = GetTechnicalScore(Soup.find('div', 'match-info__count-extra'))

            MatchNode.data = {
                'Url': Url,
                'Stadium': Stadium,
                'Referee': Referee,
                'Date': GetMatchDate(Soup.find('div', 'match-info__date')),
                'Stage': GetMatchStage(Soup.find('div', 'match-info__stage')),
                'IsExtra': IsExtraTime(Soup.find('div', 'match-info__count-extra')),
                'HomeScore': MainScore[0],
                'GuestScore': MainScore[1],
                'HomePenaltyScore': PenaltyScore[0],
                'GuestPenaltyScore': PenaltyScore[1],
                'HomeTechScore': TechScore[0],
                'GuestTechScore': TechScore[1]}
            Root.set_child(MatchNode)
            return Root
        except Exception as Err:
            self.log.exception(f"Error parsing match {Url}: {Err}")