def scan(self): forums = [ 'https://www.pathofexile.com/forum/view-forum/366/orderby/create-time', 'https://www.pathofexile.com/forum/view-forum/419/orderby/create-time', 'https://www.pathofexile.com/forum/view-forum/54/orderby/create-time' ] # In order of importance. alert_level is a cutoff here. i = 0 for forum in forums: i += 1 if i > self.alert_level: break # !cover soup = loader.soup(forum) table = soup.find(attrs={"class": 'viewForumTable'}) elems = table.find('tbody').find_all('tr') elem = None for e in elems: # Skip to first non-sticky thread. if not e.find(attrs={'class': 'sticky'}): elem = e break ttl = elem.find(attrs={'class': 'title'}) _title = ttl.text link = ttl.find('a') _url = 'https://www.pathofexile.com' + link["href"] if any(s.lower().strip() in _title.lower().strip() for s in self.ignore_terms.split(',')): continue # !cover page = loader.soup(_url) dsc = page.find(attrs={'class': 'newsPost'}) if not dsc: dsc = page.find(attrs={"class": 'content-container'}).find(attrs={'class': 'content'}) _desc = dsc.getText('\n') yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#af6025")
def scan(self): soup = loader.soup("https://playoverwatch.com/en-us/news/patch-notes/pc/") bod = soup.find(attrs={'class': 'patch-notes-patch'}) link = soup.find(attrs={'class': 'PatchNotesSideNav'}).find('a') _title = link.find('h3').text _url = 'https://playoverwatch.com/en-us/game/patch-notes/pc/' + link['href'] # First link in sidebar. _desc = bod.text yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#f99e1a")
def scan(self): soup = loader.soup("https://us.battle.net/forums/en/hearthstone/22814011/") table = soup.find(attrs={"class": 'Forum-ForumTopicList'}) elems = table.find_all(attrs={'class': 'ForumTopic'}) for elem in elems: dsc = elem.find(attrs={"class": 'ForumTopic-title'}) _url = 'https://us.battle.net' + elem["href"] _title = dsc.text _desc = dsc['data-tooltip-content'] yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#6785c2")
def scan(self): soup = loader.soup("https://na.leagueoflegends.com/en/news/game-updates/patch") elems = soup.find_all('article') for elem in elems: link = elem.parent img = elem.find('img') ttl = elem.find('h2') _url = 'https://na.leagueoflegends.com' + link["href"] _title = ttl.text _img = img['src'] _desc = 'Click here to read more!' yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, image=_img, color="#e6ac00")
def scan(self): dat = loader.json( 'https://www.rockstargames.com/newswire/get-posts.json?page=1&tag_id=591') posts = dat['posts'] for a in posts: _title = loader.direct_soup(a['title']).get_text() _url = a['link'] _desc = loader.direct_soup(a['blurb']).get_text() _image = None if 'preview_images_parsed' in a and 'featured' in a['preview_images_parsed']: _image = a['preview_images_parsed']['featured']['src'] yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#588622", image=_image)
def scan(self): soup = loader.soup("https://www.huntshowdown.com/news/tagged/news") sect = soup.find(attrs={'class': 'news-feature'}) for elem in sect.find_all(attrs={"class": 'col'}): link = elem.find('a') img = elem.find('img') dsc = elem.find('p') ttl = elem.find('h3') _url = 'https://www.huntshowdown.com' + link["href"] _title = ttl.text _img = 'https://www.huntshowdown.com' + img['src'] _desc = dsc.text yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#cc0000")
def scan(self): soup = loader.soup( "https://playoverwatch.com/en-us/news/patch-notes/pc/") bod = soup.find_all(attrs={'class': 'PatchNotes-section'}) date = soup.find(attrs={'class': 'PatchNotes-patch'})['id'].replace( ' ', '') _title = soup.find(attrs={'class': 'PatchNotes-patchTitle'}).text _url = 'https://playoverwatch.com/en-us/news/patch-notes#%s' % date _desc = bod[1].text if len(bod) else bod[0].text yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#f99e1a")
def scan(self): _title = "" _url = "" _desc = "" data = loader.soup("http://forums.factorio.com/viewforum.php?f=3") items = data.find_all("li", {"class": "announce"}) for item in items: _title = item.findChildren("a")[0].contents[0] if "Version" not in _title: continue _url = "https://forums.factorio.com" + item.findChild("a", {"class":"topictitle"})["href"][1:] _desc = self.get_description(_url) break yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#C97327")
def scan(self): soup = loader.soup( "https://www.epicgames.com/fortnite/en-US/patch-notes/" ) # Follow redirect to latest. _title = soup.find(attrs={'property': "og:title"})['content'] _desc = soup.find(attrs={'class': "patch-notes-text"}).get_text('\n') _img = soup.find(attrs={'property': "og:image"})['content'] _url = loader.get_redirect() yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, image=_img, color="#1c237a")
def scan(self): soup = loader.soup("https://www.pubg.com/category/patch-notes/") table = soup.find(attrs={"class": "l-gutters"}) elems = table.find_all(attrs={'class': 'l-gutters__item'}) for elem in elems: link = elem.find('a') img = elem.find('img') dsc = elem.find('p') # Yes, they actually spelled 'description' wrong. ttl = elem.find('h2') _url = 'https://playbattlegrounds.com' + link["href"] _title = ttl.text _img = img['src'] if img else None _desc = dsc.text + '...' yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, image=_img, color="#bf1866")
def scan(self): soup = loader.soup( "http://blog.counter-strike.net/index.php/category/updates/") elems = soup.find_all(attrs={'class': 'inner_post'}) for elem in elems: link = elem.find('a') _url = link["href"] _title = link.text _desc = elem.find('p', attrs={'class': None}).text yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#2f2217")
def scan(self): soup = loader.soup("https://us.diablo3.com/en/game/patch-notes/") latest = soup.find(attrs={'class': 'subpatches-nav'}) for link in latest.find_all('a'): url = 'https://us.diablo3.com' + link['href'] page = loader.soup(url) title = page.find(attrs={'class': 'subpatch-title'}) desc = page.find(attrs={'class': 'sub-patches'}) _title = title.get_text(" - ").strip().strip(' -') _desc = desc.get_text("\n") yield Update(game=self, update_name=_title, post_url=url, desc=_desc, color="#632004")
def scan(self): soup = loader.soup( "http://archive.deadbydaylight.com/posts/category/patch-notes/") elems = soup.find_all('article') for elem in elems: link = elem.find('a') dsc = elem.find_all('p')[1] _url = link["href"] _title = link['title'] _desc = dsc.text yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#6785c2")
def scan(self): soup = loader.json( 'https://playhearthstone.com/en-us/api/blog/articleList/?page=1&pageSize=12&tagsList[]=patch' ) for upd in soup: title = upd['title'] desc = upd['summary'] img = 'https:' + upd['thumbnail']['url'] url = upd['defaultUrl'] yield Update(game=self, update_name=title, post_url=url, desc=desc, color="#6785c2", image=img)
def scan(self): data = loader.soup("https://www.seaofthieves.com/de/release-notes/") header = data.find("div", {"class": "page-header"}) _title = header.findChildren("p")[0].contents[0] _url = "https://www.seaofthieves.com/de/release-notes/" + _title updates = data.find( "span", text="Updates").findParent("h2").findParent("div").findNext( "div").findChild("div").findChild("ul").find_all("li") _desc = self.create_description_from_updates(updates) yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#419178")
def scan(self): soup = loader.soup("http://us.battle.net/wow/en/game/patch-notes/") for item in soup.find_all(attrs={'class': 'NewsBlog'}): link = item.find('a') url = 'http://us.battle.net' + link['href'] title = item.find(attrs={'class': 'NewsBlog-title'}) desc = item.find(attrs={'class': 'NewsBlog-desc'}) _title = title.get_text(" - ").strip().strip(' -') _desc = html.unescape( desc.get_text("\n").replace('\u200b', '').strip()) yield Update(game=self, update_name=_title, post_url=url, desc=_desc, color="#78ab60")
def scan(self): data = loader.json( "http://services.runescape.com/m=news/latestNews.json?cat=1" )['newsItems'] for p in data: _title = p['title'] _url = p['link'] _desc = p['summary'] _img = p['summaryImageLink'] yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color='#f8ca40', image=_img)
def scan(self): soup = loader.soup( "https://worldoftanks.com/en/content/docs/release_notes/") latest = soup.find(attrs={'class': 'article-wrapper'}) for box in latest.find_all(attrs={'class': 'spoiler'}): title = box.find('h2') desc = box.find(attrs={'class': 'spoiler_content'}) _title = title.get_text(" - ").strip().strip(' -') _desc = desc.get_text("\n") _url = 'https://worldoftanks.com/en/content/docs/release_notes/#%s' % urllib.parse.quote( _title) yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#632004")
def scan(self): soup = loader.soup("https://us.diablo3.com/en/game/patch-notes/") for article in soup.find_all(attrs={'class': 'article-wrapper'}): anchor = article.find('a') url = 'https://us.diablo3.com' + anchor['href'] title = article.find(attrs={'class': 'article-title'}) desc = article.find(attrs={'class': 'article-summary'}) img = 'https:' + article.find( 'meta', attrs={'itemprop': 'thumbnailUrl'})['content'] _title = title.get_text(" - ") _desc = desc.get_text("\n") yield Update(game=self, update_name=_title, post_url=url, desc=_desc, color="#632004", image=img)
def scan(self): soup = loader.soup("https://www.rocketleague.com/ajax/articles-results/?cat=7-5aa1f33-rqfqqm") # for a in soup.find_all('a'): # Multiple possible, but disabled for now. a = soup.find('a') _url = 'https://www.rocketleague.com' + a['href'] _title = a.text page = loader.soup(_url) desc = page.find(attrs={'class': ['article', 'page-content']}) _desc = '' for p in desc.find_all(['li', 'strong', 'h3']): txt = p.text.replace('\t', '') if 'h' in p.name: txt = '**%s**' % txt if 'li' in p.name: txt = '• %s' % txt _desc += txt + '\n' yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#af6025")
def scan(self): soup = loader.soup( "https://forums.warframe.com/forum/3-pc-update-build-notes/") table = soup.find(attrs={"class": 'cTopicList'}) if 'ERROR: The request could not be satisfied' in soup.prettify(): raise TemporarySiteException() elems = table.find_all('li', attrs={'class': 'ipsDataItem'}) for elem in elems: link = elem.find('a') _url = link["href"] _title = link.text _desc = 'Click to read more.' yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color='#C0C0C0')
def scan(self): soup = loader.soup("https://blog.battlerite.com/category/updates/") elems = soup.find_all('article', id=lambda x: x and 'post' in x.lower()) for elem in elems: link = elem.find('a') img = elem.find('img') dsc = elem.find(attrs={"class": 'entry-content'}) _url = link["href"] _title = link.text _img = img['src'] _desc = dsc.text yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, image=_img)
def scan(self): data = loader.json( "https://prod-tridionservice.ubisoft.com/live/v1/News/Latest?pageSize=6&pageIndex=0" "&language=en-US&templateId=tcm%3A152-76778-32&detailPageId=tcm%3A150-194572-64" "&keywordList=233416&useSeoFriendlyUrl=true")[ 'items'] # Returns XML for p in data: soup = loader.direct_soup(p['Content']) _img = soup.find('img')['src'] _title = soup.find('h3').text _desc = soup.find('strong').text _url = 'https://rainbow6.ubisoft.com/' + soup.find('a')['href'] yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color='#333740', image=_img)
def scan(self): encoded = loader.json( 'https://news.blizzard.com/en-us/blog/list?pageNum=1&pageSize=30&community=heroes-of-the-storm' ) soup = loader.direct_soup(encoded['html']) elems = soup.find_all(attrs={'class': 'ArticleListItem'}) for elem in elems: a = elem.find('a') dsc = elem.find(attrs={"class": 'ArticleListItem-description'}) title = elem.find(attrs={'class': 'ArticleListItem-title'}) _url = 'https://news.blizzard.com/' + a['href'] _title = title.text _desc = dsc.text yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#632004")
def scan(self): soup = loader.soup( "https://na.leagueoflegends.com/en/news/game-updates/patch") elems = soup.find_all(attrs={"class": 'views-row'}) for elem in elems: link = elem.find('a') img = elem.find('img') dsc = elem.find(attrs={"class": 'field-type-text-long'}) ttl = elem.find('h4') _url = 'https://na.leagueoflegends.com' + link["href"] _title = ttl.text _img = 'https://na.leagueoflegends.com' + img['src'] _desc = dsc.text yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, image=_img, color="#e6ac00")
def scan(self): found = [ ] # Gotta dedupe these, because the devs post everything twice. soup = loader.soup("http://www.dota2.com/news/updates/") bod = soup.find(attrs={'id': 'mainLoop'}) for p in bod.find_all("div", id=lambda i: i and "post-" in i): title = p.find(attrs={'class': 'entry-title'}) link = title.find('a') desc = p.find(attrs={'class': 'entry-content'}) _url = link['href'] _title = title.text _desc = desc.get_text("\n\n") # Converts <br> tags to newlines. if _url not in found: found.append(_url) yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#656565")
def scan(self): soup = loader.soup("https://heroesofthestorm.com/en-us/blog/") cont = soup.find(attrs={'class': 'news-index-section'}) for box in cont.find_all(attrs={'class': 'news-list__item'}): title = box.find(attrs={'class': 'news-list__item__title'}) desc = box.find(attrs={'class': 'news-list__item__description'}) link = box.find('a') _title = title.text _desc = desc.get_text("\n") _url = link['href'] if 'http' not in _url: _url = 'https://heroesofthestorm.com' + _url if not any(s in _title.lower() for s in ['hotfix', 'update', 'patch']): continue yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#632004")
def scan(self): dat = loader.json( 'https://www.rockstargames.com/newswire/get-posts.json?tags=716&page=1' ) posts = dat['posts'] for a in posts: _title = a['title'].split('<')[0] _url = a['link'] _desc = a['blurb'].split('<')[0].replace('\n\n', '\n').replace( '\n\n', '\n') _image = None if 'preview_images_parsed' in a and 'featured' in a[ 'preview_images_parsed']: _image = a['preview_images_parsed']['featured']['src'] yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, color="#588622", image=_image)
def scan(self): _title = "" _url = "" _desc = "" _imgurl = "" data = loader.soup("https://playvalorant.com/en-us/news/") items = data.find_all( "div", {"class": "NewsArchive-module--newsCardWrapper--2OQiG"}) for item in items: _title = item.findChildren("h5")[0].contents[0] if "Patch Notes" not in _title: continue _url = "https://playvalorant.com" + item.findChild("a")["href"] _desc = item.findChildren( "p", {"class": "copy-02 NewsCard-module--description--3sFiD" })[0].contents[0] _imgurl = self.get_patchnote_highlight_picture(_url) break yield Update(game=self, update_name=_title, post_url=_url, desc=_desc, image=_imgurl, color="#FF4654")