def get(self, from_html=None): if from_html is None: text = yield from util.get_markup(self._insult_url) else: text = from_html parsed_insult = bs4.BeautifulSoup(text, 'html.parser') return parsed_insult.find('div', class_='wrap').get_text().lstrip()
def get_bestoftwitchchat_pasta(self, from_html=None): index = random.randint(1, 1104) # TODO: programmatically discover max index url = 'http://www.thebestoftwitch.com/feeds/posts/summary?start-index={}&max-results=1'.format(index) if from_html is None: text = yield from util.get_markup(url) else: text = from_html raw_pasta = self._bestoftwitchchat_regex.search(text) pasta = raw_pasta.group(1) return html.unescape(pasta.strip())
def get_oneliner(self, from_html=None): url = 'http://www.randomjoke.com/topic/oneliners.php' if from_html is None: text = yield from util.get_markup(url) else: text = from_html parsed_content = bs4.BeautifulSoup(text, 'html.parser') raw_joke = parsed_content.find_all('p')[6].get_text() match = self._joke_regex.search(raw_joke) return match.group(1).strip()
def get_twitchquotes_pasta(self, from_html=None): url = 'http://www.twitchquotes.com/random' if from_html is None: text = yield from util.get_markup(url) else: text = from_html raw_quote = self._twitchquotes_regex.sub(r'\1', text) parsed_quote = bs4.BeautifulSoup(raw_quote, 'html.parser') pasta = parsed_quote.find('div', class_='show_quote_text_area') if pasta is None: pasta = parsed_quote.find('span', id="quote_content_") return pasta.string.replace(' ', '\n').strip(string.whitespace + "\"") return pasta.string.strip(string.whitespace + "\"")