def response(self, nick, args, kwargs): try: url = self.google.lucky(u'site:songmeanings.net ' + args[0]) except NonRedirectResponse: self.log.warn( 'no url for query {0!r} found from google lucky'.format( args[0])) return u'{nick}: {error}'.format(error=self.error, **kwargs) try: soup = getsoup(url) try: title = strip_html( soup.find('a', 'pw_title').renderContents()).strip() except StandardError: title = 'Unknown artist/song, check parsing code!' text = soup.find('div', id='textblock') except StandardError: self.log.warn( 'unable to find textblock from url {0!r} (query: {1!r})'. format(url, args[0])) return u'{nick}: {error}'.format(error=self.error, **kwargs) try: lyrics = decode(text.renderContents(), 'utf-8') return u'\n'.join(['[{}]'.format(title)] + filter( None, [line.strip() for line in strip_html(lyrics).splitlines()])) except StandardError: self.log.exception('error parsing lyrics for query: {0!r}'.format( args[0])) return u'{nick}: {error}'.format(error=self.error, **kwargs)
def response(self, nick, args, kwargs): page = 1 players = [] while page: url = self.group_url + '?p=%d' % page soup = getsoup(url) next = soup.body.find('div', 'pageLinks').find(text=self.next_re) if next is None: page = None else: page = int(next.parent['href'].split('=', 1)[-1]) for player in soup.body('div', attrs={'class': self.status_re}): name = strip_html(player.p.a.renderContents()) game = player.find('span', 'linkFriend_in-game') if game is None: if settings.STEAM_SHOW_ONLINE: status = 'Online' else: status = None else: status = strip_html( game.renderContents()).split('\n')[-1].replace( ' - Join', '') if status: players.append('%s: %s' % (name, status)) if players: return u'\n'.join(players) return u'No one online.'
def rate_rt_audience(self, name): """Audience Rating from rotten tomatoes""" soup = getsoup(self.rt_search, {'search': name}, referer=self.rt_url) ourname = self.normalize(name) results = soup.find('ul', id='movie_results_ul') if results is None: rating = soup.find(name="span", attrs={ "class": "meter popcorn numeric " }).renderContents() + "%" title = strip_html( encode( soup.find('h1', 'movie_title').renderContents(), 'utf-8')).strip() return title, rating else: for result in results('li'): try: title = strip_html( result.find('div', 'media_block_content').h3.a.renderContents( )).strip() if ourname == self.normalize(title): url = result.h3.a['href'] innerSoup = getsoup(self.rt_url + url, {}, self.rt_search, {'search': name}) rating = innerSoup.find(name="span", attrs={ "class": "meter popcorn numeric " }).renderContents() + "%" return title, rating except AttributeError: pass return
def response(self, nick, args, kwargs): page = 1 players = [] while page: url = self.group_url + "?p=%d" % page soup = getsoup(url) next = soup.body.find("div", "pageLinks").find(text=self.next_re) if next is None: page = None else: page = int(next.parent["href"].split("=", 1)[-1]) for player in soup.body("div", attrs={"class": self.status_re}): name = strip_html(player.p.a.renderContents()) game = player.find("span", "linkFriend_in-game") if game is None: if settings.STEAM_SHOW_ONLINE: status = "Online" else: status = None else: status = strip_html(game.renderContents()).split("\n")[-1].replace(" - Join", "") if status: players.append("%s: %s" % (name, status)) if players: return u"\n".join(players) return u"No one online."
def rate_rt(self, name): """Rating from rotten tomatoes""" soup = getsoup(self.rt_search, {'search': name}, referer=self.rt_url) ourname = self.normalize(name) results = soup.find('ul', id='movie_results_ul') if results is None: rating = soup.find('span', id='all-critics-meter').renderContents() + '%' title = strip_html( encode( soup.find('h1', 'movie_title').renderContents(), 'utf-8')).strip() return title, rating else: for result in results('li'): try: rating = strip_html( result.find('span', 'tMeterScore').renderContents()).strip() title = strip_html( result.find('div', 'media_block_content').h3.a.renderContents( )).strip() if ourname == self.normalize(title): return title, rating except AttributeError: pass
def response(self, nick, args, kwargs): kwargs['req'].blockquoted = True try: user = args[0] except: user = None if user is None or user == u'': doc = self.geturl(self.randomURL) user = re.search(u'"currentJournal":\s*"(.*?)"', doc).group(1) url = urljoin(self.baseURL, u'/users/%s/data/rss' % user) rss = feedparser.parse(url) entry = strip_html(rss.entries[0].description)[:self.max] page = strip_html(rss.channel.link) return u'%s: [%s] %s' % (nick, page, entry)
def calculator(self, query): """Try to use google calculator for given query""" opts = dict(self.calcopts) opts[u'q'] = query doc = self.ua.open(self.search, opts=opts) soup = BeautifulSoup(doc) values = [] conv_left = soup.find('input', id='ucw_lhs_d') conv_right = soup.find('input', id='ucw_rhs_d') if not (conv_left is None or conv_right is None): left_value = conv_left['value'].strip() left_unit = conv_left.findNext('option').renderContents().strip() right_value = conv_right['value'].strip() right_unit = conv_right.findNext('option').renderContents().strip() values.append('(%s) %s = (%s) %s' % (left_unit, left_value, right_unit, right_value)) calculation = soup.find('span', 'cwcot') if calculation is not None: values.append(calculation.renderContents()) try: values.append(soup.find('h3', 'r').b.renderContents()) except StandardError: pass #ipython() result = u', '.join( filter(None, (decode(strip_html(value)).strip() for value in values))) if result: return result
def response(self, nick, args, kwargs): soup = getsoup(self.spec_url % int(args[0]) if args[0] else self.rand_url) soup.find('div', id='submit').extract() post = soup.body.find('div', 'post') id = int(post.find('a', 'fmllink')['href'].split('/')[-1]) body = strip_html(decode(' '.join(link.renderContents() for link in post('a', 'fmllink')), 'utf-8')) return u'%s: (%d) %s' % (nick, id, body)
def _getsummary(self, url, opts=None): soup, title = self._getpage(url, opts) spam = soup.find('div', attrs={'id': 'siteNotice'}) if spam is not None: spam.extract() # massage into plain text by concatenating paragraphs content = u' '.join(decode(p.renderContents(), 'utf-8') for p in soup.findAll('p')) # clean up rendered text content = strip_html(content) # strip markup content = self.citations_re.sub(u'', content) # remove citations content = self.parens_re.sub(u'', content) # remove parentheticals content = self.whitespace_re.sub(u' ', content) # compress whitespace content = self.fix_punc_re.sub(r'\1', content) # fix punctuation content = content.strip() # strip whitespace # generate summary by adding as many sentences as possible before limit summary = u'%s -' % title for sentence in self.sentence_re.findall(content): if len(summary + sentence) >= self.summary_size: break summary += ' ' + sentence return summary
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, { 's': 'tt', 'q': name }, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'IMDb Title Search': main = soup.body.find('div', id='main') name = self.normalize(name) url = None for p in main('p'): if p.b is not None: section = p.b.renderContents() if section in ('Titles (Exact Matches)', 'Popular Titles', 'Titles (Partial Matches)'): for a in p('a'): text = a.renderContents() if text: normalized = self.normalize(text) if normalized == name: url = urljoin(self.imdb_url, a['href']) break if url: break else: raise ValueError('no exact matches') soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = soup.find('span', itemprop='ratingValue').renderContents() realname = strip_html(soup.title.renderContents().replace( ' - IMDb', '')) return realname, rating
def getweather(self, location): """Look up NOAA weather""" soup = getsoup(self.noaa_search, {'inputstring': location}, referer=self.noaa_url) # jesus f*****g christ, their html is bad.. looks like 1987 # nested tables, font tags, and not a single class or id.. good game current = soup.find('img', alt='Current Local Weather') if not current: return u'NOAA website is having issues' current = current.findNext('table').table.table temp = current.td.font.renderContents().replace('<br />', '|') temp = strip_html(temp.decode('utf-8')).replace('\n', '').strip() cond, _, tempf, tempc = temp.split('|') tempc = tempc.replace('(', '').replace(')', '') tempf, tempc = self.docolor(tempf, tempc) other = current.table items = [u'%s (%s) - %s' % (tempf, tempc, cond)] for row in other('tr'): if row.a: continue cells = row('td') key = self.render(cells[0]) val = self.render(cells[1]) items.append(u'%s %s' % (key, val)) return u', '.join(items)
def clock(self, query): """Use google to look up time in a given location""" doc = self.ua.open(self.search, {'q': 'time in %s' % query}) soup = BeautifulSoup(doc) table = soup.find('li', attrs={'class': re.compile('obcontainer')}) [subtable.extract() for subtable in table.findAll('table')] return re.sub(r'\s{2,}', ' ', strip_html(self.decode(table).strip())).strip()
def response(self, nick, args, kwargs): soup = getsoup(self.spec_url % int(args[0]) if args[0] else self.rand_url) soup.find('div', id='submit').extract() post = soup.body.find('div', 'post') return u'%s: (%d) %s' % (nick, int(post.find('a', 'fmllink')['href'].split('/')[-1]), strip_html(' '.join(link.renderContents() for link in post('a', 'fmllink')).decode('utf-8', 'ignore')))
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, {'s': 'tt', 'q': name}, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'IMDb Title Search': main = soup.body.find('div', id='main') name = self.normalize(name) url = None for p in main('p'): if p.b is not None: section = p.b.renderContents() if section in ('Titles (Exact Matches)', 'Popular Titles', 'Titles (Partial Matches)'): for a in p('a'): text = a.renderContents() if text: normalized = self.normalize(text) if normalized == name: url = urljoin(self.imdb_url, a['href']) break if url: break else: raise ValueError('no exact matches') soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = soup.find('span', itemprop='ratingValue').renderContents() realname = strip_html(soup.title.renderContents().replace(' - IMDb', '')) return realname, rating
def get_title(self, url): try: html = self.ua.open(url, size=2048) title = strip_html(self.title.search(html).group(1)) except: title = url return title
def getweather(self, location): """Look up NOAA weather""" soup = getsoup(self.noaa_search, {'inputstring': location}, referer=self.noaa_url) # jesus f*****g christ, their html is bad.. looks like 1987 # nested tables, font tags, and not a single class or id.. good game current = soup.find('img', alt='Current Local Weather') if not current: return u'NOAA website is having issues' current = current.findNext('table').table.table temp = current.td.font.renderContents().replace('<br />', '|') temp = strip_html(decode(temp, 'utf-8')).replace('\n', '').strip() cond, _, tempf, tempc = temp.split('|') tempc = tempc.replace('(', '').replace(')', '') tempf, tempc = self.docolor(tempf, tempc) other = current.table items = [u'%s (%s) - %s' % (tempf, tempc, cond)] for row in other('tr'): if row.a: continue cells = row('td') key = self.render(cells[0]) val = self.render(cells[1]) items.append(u'%s %s' % (key, val)) return u', '.join(items)
def calculator(self, query): """Try to use google calculator for given query""" opts = dict(self.calcopts) opts[u'q'] = query doc = self.ua.open(self.search, opts=opts) soup = BeautifulSoup(doc) values = [] conv_left = soup.find('input', id='ucw_lhs_d') conv_right = soup.find('input', id='ucw_rhs_d') if not (conv_left is None or conv_right is None): left_value = conv_left['value'].strip() left_unit = conv_left.findNext('option').renderContents().strip() right_value = conv_right['value'].strip() right_unit = conv_right.findNext('option').renderContents().strip() values.append('(%s) %s = (%s) %s' % (left_unit, left_value, right_unit, right_value)) calculation = soup.find('span', 'cwcot') if calculation is not None: values.append(calculation.renderContents()) try: values.append(soup.find('h3', 'r').b.renderContents()) except StandardError: pass #ipython() result = u', '.join(filter(None, (decode(strip_html(value)).strip() for value in values))) if result: return result
def _response(self, *args): try: status = self.api.GetRateLimitStatus() if status['resources']['statuses']['/statuses/home_timeline'][ 'remaining'] < self.soft_limit: self.log.warn( 'twittter rate limit soft threshold exceeded:\n' + pformat(status)) raise APIError( 'Hit the Twitter ratelimit, backing off. Reduce the update frequency.' ) tweets = self.api.GetHomeTimeline(since_id=self.last_id) if tweets: lines = [] new_last_id = _getid(max(tweets, key=_getid)) try: if self.last_id is not None: for tweet in sorted(tweets, key=_getid): if tweet.id > self.last_id: tweet.text_clean = strip_html(tweet.text) lines.append( self.tweet_format.format(tweet=tweet)) finally: self.last_id = new_last_id if lines: return u'\n'.join(lines) except TwitterError, exc: raise APIError.from_twitter_error(exc)
def response(self, nick, args, kwargs): source = self.sources[args[0]] try: query = args[1] except: query = None try: num = int(query) query = None except: num = None if num: url = source.bynum.replace(u'num', unicode(num)) opts = None elif query: url = source.search opts = dict(source.opts) opts[source.query] = query else: url = source.random opts = None doc = geturl(url, opts=opts) entries = source.entries.findall(doc) if query: entries = filter(None, entries) entry = random.choice(entries) return '\n'.join(filter(None, strip_html(entry).strip().splitlines()))
def lookup_verse(self, query, book=None): """Lookup specified verse""" if book is None: book = self.DEFAULT_BIBLE elif book not in self.bibles: return u"Unknown bible.. why do you hate god so much?" opts = {"search": query, "version": book} soup = self.getsoup(self.bg_search, opts, referer=self.bg_search) passage = soup.find("div", {"class": re.compile("passage-content")}) for name in "heading passage-class-0", "publisher-info-bottom": junk = passage.find("div", name) if junk is not None: junk.extract() response = [] for para in passage("p"): response.append(para.renderContents()) res = decode(" ".join(response), "utf-8") # convert superscript verse markers to unicode while True: match = self.sup_re.search(res) if not match: break res = res.replace(match.group(0), superscript(match.group(1))) # XXX this is like this for a reason res = strip_html(res).replace(u"\xa0", u" ") while u" " in res: res = res.replace(u" ", u" ") res = res.strip() return res
def lookup_verse(self, query, book=None): """Lookup specified verse""" if book is None: book = self.DEFAULT_BIBLE elif book not in self.bibles: return u'Unknown bible.. why do you hate god so much?' opts = {'search': query, 'version': book} soup = getsoup(self.bg_search, opts, referer=self.bg_search) passage = soup.find('div', 'passage-wrap') for name in 'heading passage-class-0', 'publisher-info-bottom': passage.find('div', name).extract() response = [] for para in passage('p'): response.append(para.renderContents()) res = decode(' '.join(response), 'utf-8') # convert superscript verse markers to unicode while True: match = self.sup_re.search(res) if not match: break res = res.replace(match.group(0), superscript(match.group(1))) # XXX this is like this for a reason res = strip_html(res).replace(u'\xa0', u' ') while u' ' in res: res = res.replace(u' ', u' ') res = res.strip() return res
def sunrise_sunset(self, query, location): """Ask google for the sunrise or sunset from location""" soup = BeautifulSoup(self.ua.open(self.search, {'q': '%s in %s' % (query, location)})) image = soup.find('img', src=self.sun_re) row1 = image.findNext('td') row2 = row1.findNext('td') result = strip_html(u'%s (%s)' % (self.decode(row1), self.decode(row2))) return self.whitespace_re.sub(u' ', result.strip())
def response(self, nick, args, kwargs): fail = BeautifulSoup(self.geturl(self.url)).h1 return self.spaces_re.sub(' ', strip_html( u'%s: %s: %s %s: %s' % ( nick, self.col('red', text='FAIL'), self.fail_re.search(fail.renderContents()).group(1), self.col('green', text='FIX'), self.fail_re.search(fail.findNext('h1').renderContents()).group(1))))
def response(self, nick, args, kwargs): url = urlunparse(('https', 'www.google.com', 'search', '', urlencode({ 'num': '100', 'safe': 'off', 'hl': 'en', 'q': 'site:songmeanings.com ' + args[0] }), '')) soup = getsoup(url) new = None for h3 in soup.findAll('h3', attrs={'class': 'r'}): uri = urlparse(h3.a['href']) if uri.path == '/url': url = dict(parse_qsl(uri.query))['q'] uri = urlparse(url) if re.search('/songs/view/\d+', uri.path) is not None: new = urlunparse(uri._replace(query='', fragment='')) break elif re.search( '/profiles/(submissions|interaction)/\d+/comments', uri.path) is not None: soup = getsoup(url) for a in soup.find('a', title='Direct link to comment'): new = urlunparse( urlparse(a.parent['href'])._replace(fragment='', query='')) break if new: break if new: url = new try: soup = getsoup(url) try: title = re.sub('\s+Lyrics\s+\|\s+SongMeanings.*$', '', soup.title.renderContents()) except StandardError: title = 'Unknown artist/song, check parsing code!' text = soup.find('div', attrs={'class': re.compile(r'.*lyric-box.*')}) for a in text('a'): a.extract() except StandardError: self.log.warn( 'unable to find textblock from url {0!r} (query: {1!r})'. format(url, args[0])) return u'{nick}: {error}'.format(error=self.error, **kwargs) try: lyrics = decode(text.renderContents(), 'utf-8') return u'\n'.join(['[{}]'.format(title)] + filter( None, [line.strip() for line in strip_html(lyrics).splitlines()])) except StandardError: self.log.exception( 'error parsing lyrics for query: {0!r}'.format(args[0])) return u'{nick}: {error}'.format(error=self.error, **kwargs)
def response(self, nick, args, kwargs): opts = {'hl': 'en', 'aq': 'f', 'safe': 'off', 'q': args[0]} soup = getsoup(self.google_search, opts, referer=self.google_url) a = soup.body.find('a', 'spell') if a: res = strip_html(a.renderContents().decode('utf-8', 'ignore')) else: res = u'spelled correctly' return u'%s: %s' % (nick, res)
def lookup(self, term, idx=1): """Lookup term in dictionary""" url = urljoin(self.define_url, quote(term.lower())) soup = getsoup(url, referer=self.base_url) for br in soup('br'): br.extract() val = strip_html(soup.renderContents().decode('utf-8')) val = val.replace(u'\xa0', ' ').replace('\n', ' ') return self.whitespace_re.sub(' ', val).strip()
def normalize(name): """Normalize city name for easy comparison""" name = strip_html(name) name = year.sub(u'', name) name = badchars.sub(u' ', name) name = name.lower() name = name.strip() name = whitespace.sub(u' ', name) return name
def lookup(self, term, idx=1): """Lookup term in dictionary""" url = urljoin(self.define_url, quote(term.lower())) soup = getsoup(url, referer=self.base_url) for br in soup('br'): br.extract() val = strip_html(decode(soup.renderContents(), 'utf-8')) val = val.replace(u'\xa0', ' ').replace('\n', ' ') return self.whitespace_re.sub(' ', val).strip()
def response(self, nick, args, kwargs): opts = {'hl': 'en', 'safe': 'off', 'q': args[0]} soup = self.getsoup(self.google_search, opts, referer=self.google_url) correct = soup.body.find('a', href=re.compile(r'^/search.*spell=1')) if correct: res = strip_html(decode(correct.renderContents(), 'utf-8')) else: res = u'spelled correctly. probably.' return u'%s: %s' % (nick, res)
def rate_rt(self, name): """Rating from rotten tomatoes""" soup = getsoup(self.rt_search, {'search': name}, referer=self.rt_url) ourname = self.normalize(name) results = soup.find('ul', id='movie_results_ul') if results is None: rating = soup.find('span', id='all-critics-meter').renderContents() + '%' title = strip_html(soup.find('h1', 'movie_title').renderContents().encode('utf-8', 'ignore')).strip() return title, rating else: for result in results('li'): try: rating = strip_html(result.find('span', 'tMeterScore').renderContents()).strip() title = strip_html(result.find('div', 'media_block_content').h3.a.renderContents()).strip() if ourname == self.normalize(title): return title, rating except AttributeError: pass
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, {'s': 'tt', 'q': name, 'exact': 'true'}, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'Find - IMDb': url = urljoin(self.imdb_url, soup.body.find('table', 'findList').tr.find('td', 'result_text').a['href']) soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = soup.find('span', itemprop='ratingValue').renderContents() realname = strip_html(soup.title.renderContents().replace(' - IMDb', '')) return realname, rating
def response(self, nick, args, kwargs): fail = BeautifulSoup(geturl(self.url)).h1 return self.spaces_re.sub( ' ', strip_html(u'%s: %s: %s %s: %s' % (nick, self.col('red', text='FAIL'), self.fail_re.search(fail.renderContents()).group(1), self.col('green', text='FIX'), self.fail_re.search( fail.findNext('h1').renderContents()).group(1))))
def forecast(self, location): '''get weather forecast''' try: page = geturl(url=self.forecast_url, opts={u'query':location}).encode('utf-8') xml = ElementTree.fromstring(page) text = strip_html(xml.find('.//fcttext').text) except Exception, e: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(e) return "error looking up forecast for location: %s" % location
def clock(self, query): """Use google to look up time in a given location""" try: doc = self.ua.open(self.search, {'q': 'time in %s' % query}) soup = BeautifulSoup(doc) table = soup.find('table', 'obcontainer') time = table.find('td', style='font-size:medium') return strip_html(time.renderContents().decode('utf-8')).strip() except: raise
def clock(self, query): """Use google to look up time in a given location""" try: doc = self.ua.open(self.search, {'q': 'time in %s' % query}) soup = BeautifulSoup(doc) table = soup.find('div', 'obcontainer') time = table.find('td', style='font-size:medium') return strip_html(self.decode(time).strip()) except: raise
def forecast(self, location): """get weather forecast""" try: page = geturl(url=self.forecast_url, opts={u"query": location}).encode("utf-8") xml = ElementTree.fromstring(page) text = strip_html(xml.find(".//fcttext").text) except Exception, e: self.log.warn(u"error in module %s" % self.__module__) self.log.exception(e) return "error looking up forecast for location: %s" % location
def extract_quote(self, obj): li = obj.find(u'li') contents = li.contents contents = [unicode(part) for part in contents] quote = u' '.join(contents) quote = strip_html(quote) quote = _linebreak.sub(u' ', quote) quote = _whitespace.sub(u' ', quote) quote = quote.strip() return quote
def sunrise_sunset(self, query, location): """Ask google for the sunrise or sunset from location""" soup = BeautifulSoup( self.ua.open(self.search, {'q': '%s in %s' % (query, location)})) image = soup.find('img', src=self.sun_re) row1 = image.findNext('td') row2 = row1.findNext('td') result = strip_html(u'%s (%s)' % (self.decode(row1), self.decode(row2))) return self.whitespace_re.sub(u' ', result.strip())
def translate(self, text, src, dst): """Perform the translation""" opts = {'langpair': '%s|%s' % (self.langs[src], self.langs[dst]), 'v': '1.0', 'q': text} res = simplejson.loads(geturl(self.url, opts))['responseData'] text = strip_html(res['translatedText']) try: text = u'[detected %s] %s' % (self.lookup[res['detectedSourceLanguage']].capitalize(), text) except KeyError: pass return text
def extract_quote(self, obj): li = obj.find(u'li') contents = li.contents contents = [unicode(part) for part in contents] quote = u' '.join(contents) quote = strip_html(quote) quote = self._linebreak.sub(u' ', quote) quote = self._whitespace.sub(u' ', quote) quote = quote.strip() return quote
def response(self, nick, args, kwargs): doc = geturl(self.random, add_headers={'Accept': '*/*'}) soup = BeautifulSoup(doc) main = soup.find(u'div', attrs={u'id': u'main'}) confs = main.findAll(u'div', attrs={u'class': u'content'}) conf = random.choice(confs) conf = [unicode(p) for p in conf.findAll(u'p')] conf = u' '.join(conf) conf = strip_html(conf) conf = conf.strip() return conf
def normalize(self, name): """Normalize a movie title for easy comparison""" name = strip_html(name) name = self.year_re.sub('', name) # strip trailing year name = self.rev_article_re.sub(r'\2 \1', name) # Movie, The = The Movie name = self.articles_re.sub('', name) # strip leading the/an name = self.badchars_re.sub(' ', name) # only allow alnum name = name.lower() # lowercase only name = name.strip() # strip whitespace name = self.whitespace_re.sub(' ', name) # compress whitespace return name
def response(self, nick, args, kwargs): kwargs["req"].quoted = True soup = self.getsoup(url) posts = soup.body("div", "content") contents = [] for post in posts: a = post.find("a", href=re.compile(r"Text-Replies")) if a is not None: content = u" ".join(strip_html(decode(a.renderContents())).strip().splitlines()) contents.append(content) return random.choice(contents)
def rate_rt_audience(self, name): """Audience Rating from rotten tomatoes""" soup = self.getsoup(self.rt_search, {'search': name}, referer=self.rt_url) ourname = self.normalize(name) results = soup.find('ul', id='movie_results_ul') if results is None: rating = soup.find(name="span", attrs={ "class" : "meter popcorn numeric " }).renderContents() + "%" title = strip_html(encode(soup.find('h1', 'movie_title').renderContents(), 'utf-8')).strip() return title, rating else: for result in results('li'): try: title = strip_html(result.find('div', 'media_block_content').h3.a.renderContents()).strip() if ourname == self.normalize(title): url = result.h3.a['href'] innerSoup = self.getsoup(self.rt_url+url, { }, self.rt_search, {'search': name}) rating = innerSoup.find(name="span", attrs= { "class" : "meter popcorn numeric " }).renderContents() + "%" return title, rating except AttributeError: pass return
def normalize(self, name): """Normalize a movie title for easy comparison""" name = strip_html(name) name = self.year_re.sub('', name) # strip trailing year name = self.rev_article_re.sub(r'\2 \1', name) # Movie, The = The Movie name = self.articles_re.sub('', name) # strip leading the/an name = self.badchars_re.sub(' ', name) # only allow alnum name = name.lower() # lowercase only name = name.strip() # strip whitespace name = self.whitespace_re.sub(' ', name) # compress whitespace name = self.and_re.sub(' ', name) # the word "and" return name
def response(self, nick, args, kwargs): try: query = args[0] doc = self.geturl(self.search, opts={u'verbose': u'on', u'name': query}) drink = self.drink.search(doc).group(1) url = urljoin(self.baseurl, drink) doc = self.geturl(url) title = self.title.search(doc).group(1) ingredients = self.ingredients.findall(doc) instructions = self.instructions.search(doc).group(1) response = strip_html(u'%s - %s - %s' % (title, u', '.join(ingredients), instructions)) except Exception, error: response = u"That's a made-up drink, sorry."
def bodycount(self): try: doc = geturl(self._bodycount_url) data = self._bodycount_re.search(doc).group(1) data = decode(data, 'ascii') data = strip_html(data) data = self._re_whitespace.sub(u' ', data) data = data.strip() return data except Exception, error: self.log.warn(u'error in module %s' % self.__module__) self.log.exception(error) return u'UNKNOWN'
def on_message(self, user, message, private, addressed, chat=None): """Process incoming messages and dispatch to main bot""" if user.name == self.bot.botname(): return message = strip_html(message) req = Request(message=message) req.nick = user.name req.channel = u'AIM' req.aim = self req.private = private req.addressed = addressed req.chat = chat self.bot.log.info(u'[AIM] <%s> %s' % (req.nick, req.message)) self.bot.check_addressing(req) self.bot.process_message(req)
def response(self, nick, args, kwargs): try: url = args[0] uri = urlparse(url) if (uri.scheme.lower() in SCHEMES and '.'.join(uri.netloc.lower().split('.')[-2:]) in DOMAINS and os.path.split(os.path.normpath(uri.path))[-1] == 'watch' and 'v' in cgi.parse_qs(uri.query)): soup = getsoup(url) title = strip_html(decode(soup.title.renderContents())).replace(u' - YouTube', u'').strip() if title: self.bot.output(title, kwargs['req']) except (KeyboardInterrupt, SystemExit): raise except: pass
def get_summary(self, query): soup, title = self.get_soup(query) # check if this is a disambiguation page, if so construct special page # there isn't a consistent style guide, so we just try to do the # most common format (ordered list of links). if this fails, return # a friendly failure for now if soup.find(u'div', attrs={u'id': u'disambig'}): try: summary = u'%s (Disambiguation) - ' % title for link in soup.find(u'ul').findAll(u'a'): title = unicode(link[u'title']).strip() if len(summary) + len(title) + 2 > self.summary_size: break if not summary.endswith(u' '): summary += u', ' summary += title except: summary = u'Fancy, unsupported disambiguation page!' return summary # massage into plain text by concatenating paragraphs content = [] for para in soup.findAll(u'p'): content.append(unicode(para)) content = u' '.join(content) # clean up rendered text content = strip_html(content) # strip markup content = Wiki._citations.sub(u'', content) # remove citations content = Wiki._parens.sub(u'', content) # remove parentheticals content = Wiki._whitespace.sub(u' ', content) # compress whitespace content = Wiki._fix_punc.sub(r'\1', content) # fix punctuation content = content.strip() # strip whitespace # search error if title == self.error: return u'No results found for "%s"' % query # generate summary by adding as many sentences as possible before limit summary = u'%s -' % title for sentence in Wiki._sentence.findall(content): if len(summary) + 1 + len(sentence) > self.summary_size: break summary += u' %s' % sentence return summary
def response(self, nick, args, kwargs): query = args[0] if query is None or query == u'': url = self.random_url else: query = u' '.join(query.split()) query = query.replace(u' ', u'_') query = encode(query, 'utf-8') query = urllib.quote(query) + u'.php' url = urljoin(self.baseurl, query) doc = geturl(url) result = self.joke.findall(doc)[0] result = strip_html(result) # cleanup output a bit.. some funny whitespace in it -cj result = result.replace(u'\x14', u' ') result = result.replace(u'\n', u' ') result = re.sub(r'\s{2,}', u' ', result) return result.strip()
def response(self, nick, args, kwargs): query = args[0] check = self.clean.sub(u'', query) check = re.compile(re.escape(check), re.I) results = [] page = getsoup(self.url) table = page.find('table', id='gvIncidents') rows = table('tr')[1:] for row in rows: _, num, time, type, loc, coord, area = [ strip_html(cell.renderContents()) for cell in row('td') ] if check.search(loc): results.append(u'=> %s: %s (%s) %s' % (time, loc, area, type)) if len(results) > 0: return u'\n'.join(results) else: return u'%s: No incidents found' % nick
def get_quote(self, symbols): """Looks up the symbol from finance.yahoo.com, returns formatted result""" symbols = [quote(symbol) for symbol in symbols.split()] url = Yahoo._quote_url.replace(u'SYMBOL', "+".join(symbols)) page = geturl(url) results = [] for line in page.splitlines(): data = csv.reader([line]).next() symbol = data[0] name = data[1] trade_time, last_trade = strip_html(data[3]).split(" - ") last_trade = locale.atof(last_trade) try: last_close = locale.atof(data[2]) exchange = False except ValueError: last_close = last_trade exchange = True if trade_time == "N/A": trade_time = u'market close' if exchange: results.append(u'%s (%s) - %s: %.4f' % (name, symbol, trade_time, last_trade)) else: delta = last_trade - last_close try: delta_perc = delta * 100.0 / last_close except ZeroDivisionError: delta_perc = 0.00 if delta < 0: color = u'red' elif delta > 0: color = u'green' else: color = u'white' text = self.colorlib.get_color(color, text=u'%.2f (%+.2f %+.2f%%)' % (last_trade, delta, delta_perc)) results.append(u'%s (%s) - Open: %.2f | %s: %s' % (name, symbol, last_close, trade_time, text)) return u'\n'.join(results)
def rate_imdb(self, name): """Get user rating from IMDB""" page = geturl(self.imdb_search, { 's': 'tt', 'q': name, 'exact': 'true' }, referer=self.imdb_url) soup = BeautifulSoup(page) if soup.title.renderContents() == 'Find - IMDb': url = urljoin( self.imdb_url, soup.body.find('table', 'findList').tr.find('td', 'result_text').a['href']) soup = BeautifulSoup(geturl(url, referer=self.imdb_search)) rating = soup.find('span', itemprop='ratingValue').renderContents() realname = strip_html(soup.title.renderContents().replace( ' - IMDb', '')) return realname, rating