def _get_matches_info(self, match_elements, championship_urls): matches = [] base_len = len(OneXBetScraper._BASE_URL) for match_element in list(match_elements)[:]: try: url = match_element.get_attribute('href')[base_len:] except StaleElementReferenceException: print('Caught StaleElementReferenceException') continue if url in championship_urls or match_element.get_attribute( 'class') != 'link' or url.endswith('-Special-bets/'): continue match_title_text = match_element.find_element_by_class_name( 'gname').text match_title = MatchTitle.from_str(match_title_text) date_time_str = match_element.find_element_by_class_name( 'date').text try: date_time = DateTime.from_1xbet_str(date_time_str) except ValueError: print(url) continue match = Match(match_title, self._BASE_URL + url, date_time, self) matches.append(match) return matches
def _get_match_title(tag): """ Scrapes match title found in the tag :param tag: one of the tags in parsed document created from the championship url's html :type tag: BeautifulSoup.Tag :return: match title in the form 'first team - second_team' :rtype: MatchTitle """ br = tag.find(class_='l').find('br') teams = [] first_team = br.previous_sibling if not isinstance(first_team, str): first_team = first_team.text teams.append(first_team) second_team = br.next_sibling if not isinstance(second_team, str) and second_team is not None: second_team = second_team.text if second_team: teams.append(second_team) match_title = MatchTitle(teams) return match_title
def _get_bets_from_url(self, match_url): """ Scraps data such as match titles, bet titles and odds from the given match url :param match_url: any match url on the website :type match_url: str :return: bets dictionary in the following form: bets[match_title][bet_title] = odds :rtype: dict """ self.renderer.get(match_url) time.sleep(2) bets = [] main_table = self.renderer.wait.until( EC.presence_of_element_located( (By.CLASS_NAME, 'Match__container___fpI_d'))) # main_table = self.renderer.find_element_by_class_name('Match__container___fpI_d') teams = [ el.text for el in main_table.find_elements_by_class_name( '__app-PromoMatchBody-competitor-name') ] # print(teams) try: date = main_table.find_element_by_class_name( 'dateTime__date___2QS99').text.lower() except Exception: date = '' match = Match(MatchTitle(teams), match_url, date) self._parse_marketblocks(bets, match_url) match.bets = bets return match
def _get_bets_from_url(self, match_url): self.renderer.get(match_url) bets = [] basic_info = self.renderer.wait.until( EC.presence_of_element_located( (By.CLASS_NAME, 'sticky-inner-wrapper'))) # basic_info = self.renderer.find_element_by_class_name('sticky-inner-wrapper') date = basic_info.find_element_by_class_name( 'event--date--1').text.lower() date = self._format_date(date) teams = [ el.text for el in basic_info.find_element_by_class_name( 'event--name').find_elements_by_tag_name('span') ] match = Match(MatchTitle(teams), match_url, date, self) element = self.renderer.wait.until( EC.presence_of_element_located((By.CLASS_NAME, 'slick-block'))) element = self.renderer.wait.until( EC.element_to_be_clickable((By.CLASS_NAME, 'slick-block'))) try: self.renderer.click(element) except Exception: print('ne mogu nazhat na all') time.sleep(0.5) self._parse_marketblocks(bets, match_url) match.bets = bets return match
def _get_match_basic_data(self, match): date_time_str = match.find_element_by_class_name('date').text date_time = DateTime.from_marathon_str(date_time_str) url = match.find_element_by_class_name('member-link').get_attribute( 'href') teams = [ el.text for el in match.find_elements_by_tag_name('span') if el.get_attribute('data-member-link') ] return Match(MatchTitle(teams), url, date_time, self)
def _get_match_basic_data(self, event): date = event.find_element_by_class_name('event--date').text time = event.find_element_by_class_name('event--time').text date_time_str = date + time date_time = DateTime.from_favorit_str(date_time_str) name = event.find_element_by_class_name('long--name').text.lower() button = event.find_element_by_class_name('event--more') self.renderer.click(button) url = self.renderer.current_url return Match(MatchTitle(name.split(' - ')), url, date_time, self)
def _get_live_match_basic_data(self, event): teams = [el.text for el in event.find_elements_by_class_name('member')] for team in teams: if '—' in team: teams.remove(team) team = team.replace('—', '') teams.append(team) self.renderer.click(event) url = self.renderer.current_url self.renderer.back() return Match(MatchTitle(teams), url, None, self)
def from_dict(cls, match_dict): url = None date_time = None title = None key = list(match_dict.keys())[0] found = re.search(r'^(https://.+?) (.+?): (.+?)$', key) if found: url = found.group(1) date_time = DateTime.fromisoformat(found.group(2)) title = MatchTitle.from_str(found.group(3)) bets_dict = list(match_dict.values())[0][0] bets = [ Bet.from_dict({bet_title: odds}) for bet_title, odds in bets_dict.items() ] return cls(title, url, date_time, None, bets)
def get_matches_info_sport(self, sport_name): matches = [] subsections = self.get_tournaments(sport_name) for subsection in subsections: # if not LIVE: try: self.renderer.click(subsection) except StaleElementReferenceException: continue # print(' ', subsections.index(subsection) + 1) time.sleep(2) events = self.renderer.find_elements_by_class_name( 'sportEventRow__body___3Ywcg') time.sleep(2) for event in events: try: event.find_element_by_class_name( 'matchDateTime__isLive___8f4IP') continue # match is live except NoSuchElementException: pass except StaleElementReferenceException: continue date_text = event.find_element_by_class_name( 'matchDateTime__date___2Hw-c').text teams_webel = event.find_elements_by_class_name( '__app-LogoTitle-wrapper') teams = [el.text for el in teams_webel] url = teams_webel[0].find_element_by_tag_name( 'a').get_attribute('href') date = DateTime.from_ggbet_str(date_text) matches.append(Match(MatchTitle(teams), url, date, self)) # if not LIVE: self.renderer.click(subsection) print(len(matches)) return Sport(sport_name, matches)
while True: s = SequenceMatcher(None, first_team, second_team) substring = s.find_longest_match(0, len(first_team), 0, len(second_team)) if substring.size < min(3, min_initial_length): break substrings_total_length += substring.size # print(first_team[substring.a:substring.b]) first_team = first_team[:substring.a] + first_team[substring.a + substring.size:] second_team = second_team[:substring. b] + second_team[substring.b + substring.size:] similarity = substrings_total_length / min_initial_length + \ (substrings_total_length - max_initial_length) / (10 * max_initial_length) return similarity if __name__ == '__main__': certainty = 0.5 comparator = MatchComparator() m1 = Match(MatchTitle(['jd gaming', 'team we']), '123', DateTime(2020, 12, 26, 12, 30), 1, []) m2 = Match(MatchTitle(['jd', 'we']), '321', DateTime(2020, 12, 26, 13), 2, []) similarity = comparator.calculate_matches_similarity(m1, m2, certainty) print(similarity) print(comparator.similar(m1, m2, certainty)) print(comparator.similarities)
class GGBetScraper: def __new__(cls): if not hasattr(cls, 'instance'): cls.instance = super(GGBetScraper, cls).__new__(cls) return cls.instance def get_name(self): return '1xbet' if __name__ == '__main__': grouper = CSGOForkGrouper() csgo = Sport('csgo', [ Match( MatchTitle(['Top Esports', 'Suning Gaming']), 'https://1x-bet.com/en/line/Esports/1309773-League-of-Legends-LPL-Summer-Playoffs/81469989-Top-Esports-Suning-Gaming/', DateTime(2020, 8, 9, 19, 0, 0), OneXBetScraper(), []), Match(MatchTitle(['Top', 'Sunning']), 'https://www.parimatch.com/en/sport/kibersport/liga-legend-lpl', DateTime(2020, 8, 9, 19, 0, 0), ParimatchScraper(), []), Match( MatchTitle(['Top Esports', 'SN Gaming']), 'https://www.favorit.com.ua/en/bets/#event=27802672&tours=182350,776347,776418,792747,977780,1011803,1037535,1061879,1258082,1265594,1293917,1618224,1713907,2270463', DateTime(2020, 8, 9, 19, 0, 0), FavoritScraper(), []), Match( MatchTitle(['Suning Gaming', 'Top Esports']), 'https://www.marathonbet.com/en/betting/e-Sports/LoL/LPL+Summer/Main+Event/Best+of+5+maps/Suning+Gaming+vs+Top+Esports+-+9994779', DateTime(2020, 8, 9, 19, 0, 0), MarathonScraper(), []), Match( MatchTitle(['Top Esports', 'Suning Gaming']),
def get_live_match_basic_data(self, event): self.renderer.click(event) teams = [name.text for name in event.find_elements_by_tag_name('span')] match_url = self.renderer.current_url return Match(MatchTitle(teams), match_url, '', self)