Exemple #1
0
    def response(self, nick, args, kwargs):
        try:
            url = self.google.lucky(u'site:songmeanings.net ' + args[0])
        except NonRedirectResponse:
            self.log.warn(
                'no url for query {0!r} found from google lucky'.format(
                    args[0]))
            return u'{nick}: {error}'.format(error=self.error, **kwargs)

        try:
            soup = getsoup(url)
            try:
                title = strip_html(
                    soup.find('a', 'pw_title').renderContents()).strip()
            except StandardError:
                title = 'Unknown artist/song, check parsing code!'
            text = soup.find('div', id='textblock')
        except StandardError:
            self.log.warn(
                'unable to find textblock from url {0!r} (query: {1!r})'.
                format(url, args[0]))
            return u'{nick}: {error}'.format(error=self.error, **kwargs)

        try:
            lyrics = decode(text.renderContents(), 'utf-8')
            return u'\n'.join(['[{}]'.format(title)] + filter(
                None,
                [line.strip() for line in strip_html(lyrics).splitlines()]))
        except StandardError:
            self.log.exception('error parsing lyrics for query: {0!r}'.format(
                args[0]))
            return u'{nick}: {error}'.format(error=self.error, **kwargs)
Exemple #2
0
 def response(self, nick, args, kwargs):
     page = 1
     players = []
     while page:
         url = self.group_url + '?p=%d' % page
         soup = getsoup(url)
         next = soup.body.find('div', 'pageLinks').find(text=self.next_re)
         if next is None:
             page = None
         else:
             page = int(next.parent['href'].split('=', 1)[-1])
         for player in soup.body('div', attrs={'class': self.status_re}):
             name = strip_html(player.p.a.renderContents())
             game = player.find('span', 'linkFriend_in-game')
             if game is None:
                 if settings.STEAM_SHOW_ONLINE:
                     status = 'Online'
                 else:
                     status = None
             else:
                 status = strip_html(
                     game.renderContents()).split('\n')[-1].replace(
                         ' - Join', '')
             if status:
                 players.append('%s: %s' % (name, status))
     if players:
         return u'\n'.join(players)
     return u'No one online.'
Exemple #3
0
 def rate_rt_audience(self, name):
     """Audience Rating from rotten tomatoes"""
     soup = getsoup(self.rt_search, {'search': name}, referer=self.rt_url)
     ourname = self.normalize(name)
     results = soup.find('ul', id='movie_results_ul')
     if results is None:
         rating = soup.find(name="span",
                            attrs={
                                "class": "meter popcorn numeric "
                            }).renderContents() + "%"
         title = strip_html(
             encode(
                 soup.find('h1', 'movie_title').renderContents(),
                 'utf-8')).strip()
         return title, rating
     else:
         for result in results('li'):
             try:
                 title = strip_html(
                     result.find('div',
                                 'media_block_content').h3.a.renderContents(
                                 )).strip()
                 if ourname == self.normalize(title):
                     url = result.h3.a['href']
                     innerSoup = getsoup(self.rt_url + url, {},
                                         self.rt_search, {'search': name})
                     rating = innerSoup.find(name="span",
                                             attrs={
                                                 "class":
                                                 "meter popcorn numeric "
                                             }).renderContents() + "%"
                     return title, rating
             except AttributeError:
                 pass
         return
Exemple #4
0
 def response(self, nick, args, kwargs):
     page = 1
     players = []
     while page:
         url = self.group_url + "?p=%d" % page
         soup = getsoup(url)
         next = soup.body.find("div", "pageLinks").find(text=self.next_re)
         if next is None:
             page = None
         else:
             page = int(next.parent["href"].split("=", 1)[-1])
         for player in soup.body("div", attrs={"class": self.status_re}):
             name = strip_html(player.p.a.renderContents())
             game = player.find("span", "linkFriend_in-game")
             if game is None:
                 if settings.STEAM_SHOW_ONLINE:
                     status = "Online"
                 else:
                     status = None
             else:
                 status = strip_html(game.renderContents()).split("\n")[-1].replace(" - Join", "")
             if status:
                 players.append("%s: %s" % (name, status))
     if players:
         return u"\n".join(players)
     return u"No one online."
Exemple #5
0
 def rate_rt(self, name):
     """Rating from rotten tomatoes"""
     soup = getsoup(self.rt_search, {'search': name}, referer=self.rt_url)
     ourname = self.normalize(name)
     results = soup.find('ul', id='movie_results_ul')
     if results is None:
         rating = soup.find('span',
                            id='all-critics-meter').renderContents() + '%'
         title = strip_html(
             encode(
                 soup.find('h1', 'movie_title').renderContents(),
                 'utf-8')).strip()
         return title, rating
     else:
         for result in results('li'):
             try:
                 rating = strip_html(
                     result.find('span',
                                 'tMeterScore').renderContents()).strip()
                 title = strip_html(
                     result.find('div',
                                 'media_block_content').h3.a.renderContents(
                                 )).strip()
                 if ourname == self.normalize(title):
                     return title, rating
             except AttributeError:
                 pass
Exemple #6
0
 def response(self, nick, args, kwargs):
     kwargs['req'].blockquoted = True
     try:
         user = args[0]
     except:
         user = None
     if user is None or user == u'':
         doc = self.geturl(self.randomURL)
         user = re.search(u'"currentJournal":\s*"(.*?)"', doc).group(1)
     url = urljoin(self.baseURL, u'/users/%s/data/rss' % user)
     rss = feedparser.parse(url)
     entry = strip_html(rss.entries[0].description)[:self.max]
     page = strip_html(rss.channel.link)
     return u'%s: [%s] %s' % (nick, page, entry)
Exemple #7
0
 def calculator(self, query):
     """Try to use google calculator for given query"""
     opts = dict(self.calcopts)
     opts[u'q'] = query
     doc = self.ua.open(self.search, opts=opts)
     soup = BeautifulSoup(doc)
     values = []
     conv_left = soup.find('input', id='ucw_lhs_d')
     conv_right = soup.find('input', id='ucw_rhs_d')
     if not (conv_left is None or conv_right is None):
         left_value = conv_left['value'].strip()
         left_unit = conv_left.findNext('option').renderContents().strip()
         right_value = conv_right['value'].strip()
         right_unit = conv_right.findNext('option').renderContents().strip()
         values.append('(%s) %s = (%s) %s' %
                       (left_unit, left_value, right_unit, right_value))
     calculation = soup.find('span', 'cwcot')
     if calculation is not None:
         values.append(calculation.renderContents())
     try:
         values.append(soup.find('h3', 'r').b.renderContents())
     except StandardError:
         pass
     #ipython()
     result = u', '.join(
         filter(None,
                (decode(strip_html(value)).strip() for value in values)))
     if result:
         return result
Exemple #8
0
 def response(self, nick, args, kwargs):
     soup = getsoup(self.spec_url % int(args[0]) if args[0] else self.rand_url)
     soup.find('div', id='submit').extract()
     post = soup.body.find('div', 'post')
     id = int(post.find('a', 'fmllink')['href'].split('/')[-1])
     body = strip_html(decode(' '.join(link.renderContents() for link in post('a', 'fmllink')), 'utf-8'))
     return u'%s: (%d) %s' % (nick, id, body)
Exemple #9
0
    def _getsummary(self, url, opts=None):
        soup, title = self._getpage(url, opts)

        spam = soup.find('div', attrs={'id': 'siteNotice'})
        if spam is not None:
            spam.extract()

        # massage into plain text by concatenating paragraphs
        content = u' '.join(decode(p.renderContents(), 'utf-8') for p in soup.findAll('p'))

        # clean up rendered text
        content = strip_html(content)                    # strip markup
        content = self.citations_re.sub(u'', content)   # remove citations
        content = self.parens_re.sub(u'', content)      # remove parentheticals
        content = self.whitespace_re.sub(u' ', content) # compress whitespace
        content = self.fix_punc_re.sub(r'\1', content)  # fix punctuation
        content = content.strip()                       # strip whitespace

        # generate summary by adding as many sentences as possible before limit
        summary = u'%s -' % title
        for sentence in self.sentence_re.findall(content):
            if len(summary + sentence) >= self.summary_size:
                break
            summary += ' ' + sentence
        return summary
Exemple #10
0
 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {
         's': 'tt',
         'q': name
     },
                   referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'IMDb Title Search':
         main = soup.body.find('div', id='main')
         name = self.normalize(name)
         url = None
         for p in main('p'):
             if p.b is not None:
                 section = p.b.renderContents()
                 if section in ('Titles (Exact Matches)', 'Popular Titles',
                                'Titles (Partial Matches)'):
                     for a in p('a'):
                         text = a.renderContents()
                         if text:
                             normalized = self.normalize(text)
                             if normalized == name:
                                 url = urljoin(self.imdb_url, a['href'])
                                 break
                     if url:
                         break
         else:
             raise ValueError('no exact matches')
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = soup.find('span', itemprop='ratingValue').renderContents()
     realname = strip_html(soup.title.renderContents().replace(
         ' - IMDb', ''))
     return realname, rating
Exemple #11
0
    def getweather(self, location):
        """Look up NOAA weather"""
        soup = getsoup(self.noaa_search, {'inputstring': location},
                       referer=self.noaa_url)

        # jesus f*****g christ, their html is bad.. looks like 1987
        # nested tables, font tags, and not a single class or id.. good game
        current = soup.find('img', alt='Current Local Weather')
        if not current:
            return u'NOAA website is having issues'
        current = current.findNext('table').table.table
        temp = current.td.font.renderContents().replace('<br />', '|')
        temp = strip_html(temp.decode('utf-8')).replace('\n', '').strip()
        cond, _, tempf, tempc = temp.split('|')
        tempc = tempc.replace('(', '').replace(')', '')
        tempf, tempc = self.docolor(tempf, tempc)
        other = current.table
        items = [u'%s (%s) - %s' % (tempf, tempc, cond)]
        for row in other('tr'):
            if row.a:
                continue
            cells = row('td')
            key = self.render(cells[0])
            val = self.render(cells[1])
            items.append(u'%s %s' % (key, val))
        return u', '.join(items)
Exemple #12
0
 def clock(self, query):
     """Use google to look up time in a given location"""
     doc = self.ua.open(self.search, {'q': 'time in %s' % query})
     soup = BeautifulSoup(doc)
     table = soup.find('li', attrs={'class': re.compile('obcontainer')})
     [subtable.extract() for subtable in table.findAll('table')]
     return re.sub(r'\s{2,}', ' ', strip_html(self.decode(table).strip())).strip()
 def response(self, nick, args, kwargs):
     soup = getsoup(self.spec_url % int(args[0]) if args[0] else self.rand_url)
     soup.find('div', id='submit').extract()
     post = soup.body.find('div', 'post')
     return u'%s: (%d) %s' % (nick, int(post.find('a', 'fmllink')['href'].split('/')[-1]),
                              strip_html(' '.join(link.renderContents()
                                                 for link in post('a', 'fmllink')).decode('utf-8', 'ignore')))
Exemple #14
0
 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {'s': 'tt', 'q': name}, referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'IMDb Title Search':
         main = soup.body.find('div', id='main')
         name = self.normalize(name)
         url = None
         for p in main('p'):
             if p.b is not None:
                 section = p.b.renderContents()
                 if section in ('Titles (Exact Matches)', 'Popular Titles', 'Titles (Partial Matches)'):
                     for a in p('a'):
                         text = a.renderContents()
                         if text:
                             normalized = self.normalize(text)
                             if normalized == name:
                                 url = urljoin(self.imdb_url, a['href'])
                                 break
                     if url:
                         break
         else:
             raise ValueError('no exact matches')
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = soup.find('span', itemprop='ratingValue').renderContents()
     realname = strip_html(soup.title.renderContents().replace(' - IMDb', ''))
     return realname, rating
 def get_title(self, url):
     try:
         html = self.ua.open(url, size=2048)
         title = strip_html(self.title.search(html).group(1))
     except:
         title = url
     return title
Exemple #16
0
    def getweather(self, location):
        """Look up NOAA weather"""
        soup = getsoup(self.noaa_search, {'inputstring': location},
                       referer=self.noaa_url)

        # jesus f*****g christ, their html is bad.. looks like 1987
        # nested tables, font tags, and not a single class or id.. good game
        current = soup.find('img', alt='Current Local Weather')
        if not current:
            return u'NOAA website is having issues'
        current = current.findNext('table').table.table
        temp = current.td.font.renderContents().replace('<br />', '|')
        temp = strip_html(decode(temp, 'utf-8')).replace('\n', '').strip()
        cond, _, tempf, tempc = temp.split('|')
        tempc = tempc.replace('(', '').replace(')', '')
        tempf, tempc = self.docolor(tempf, tempc)
        other = current.table
        items = [u'%s (%s) - %s' % (tempf, tempc, cond)]
        for row in other('tr'):
            if row.a:
                continue
            cells = row('td')
            key = self.render(cells[0])
            val = self.render(cells[1])
            items.append(u'%s %s' % (key, val))
        return u', '.join(items)
Exemple #17
0
 def calculator(self, query):
     """Try to use google calculator for given query"""
     opts = dict(self.calcopts)
     opts[u'q'] = query
     doc = self.ua.open(self.search, opts=opts)
     soup = BeautifulSoup(doc)
     values = []
     conv_left = soup.find('input', id='ucw_lhs_d')
     conv_right = soup.find('input', id='ucw_rhs_d')
     if not (conv_left is None or conv_right is None):
         left_value = conv_left['value'].strip()
         left_unit = conv_left.findNext('option').renderContents().strip()
         right_value = conv_right['value'].strip()
         right_unit = conv_right.findNext('option').renderContents().strip()
         values.append('(%s) %s = (%s) %s' % (left_unit, left_value, right_unit, right_value))
     calculation = soup.find('span', 'cwcot')
     if calculation is not None:
         values.append(calculation.renderContents())
     try:
         values.append(soup.find('h3', 'r').b.renderContents())
     except StandardError:
         pass
     #ipython()
     result = u', '.join(filter(None, (decode(strip_html(value)).strip() for value in values)))
     if result:
         return result
Exemple #18
0
    def _response(self, *args):
        try:
            status = self.api.GetRateLimitStatus()
            if status['resources']['statuses']['/statuses/home_timeline'][
                    'remaining'] < self.soft_limit:
                self.log.warn(
                    'twittter rate limit soft threshold exceeded:\n' +
                    pformat(status))
                raise APIError(
                    'Hit the Twitter ratelimit, backing off. Reduce the update frequency.'
                )

            tweets = self.api.GetHomeTimeline(since_id=self.last_id)
            if tweets:
                lines = []
                new_last_id = _getid(max(tweets, key=_getid))
                try:
                    if self.last_id is not None:
                        for tweet in sorted(tweets, key=_getid):
                            if tweet.id > self.last_id:
                                tweet.text_clean = strip_html(tweet.text)
                                lines.append(
                                    self.tweet_format.format(tweet=tweet))

                finally:
                    self.last_id = new_last_id
                if lines:
                    return u'\n'.join(lines)
        except TwitterError, exc:
            raise APIError.from_twitter_error(exc)
Exemple #19
0
 def response(self, nick, args, kwargs):
     source = self.sources[args[0]]
     try:
         query = args[1]
     except:
         query = None
     try:
         num = int(query)
         query = None
     except:
         num = None
     if num:
         url = source.bynum.replace(u'num', unicode(num))
         opts = None
     elif query:
         url = source.search
         opts = dict(source.opts)
         opts[source.query] = query
     else:
         url = source.random
         opts = None
     doc = geturl(url, opts=opts)
     entries = source.entries.findall(doc)
     if query:
         entries = filter(None, entries)
     entry = random.choice(entries)
     return '\n'.join(filter(None, strip_html(entry).strip().splitlines()))
Exemple #20
0
    def lookup_verse(self, query, book=None):
        """Lookup specified verse"""
        if book is None:
            book = self.DEFAULT_BIBLE
        elif book not in self.bibles:
            return u"Unknown bible.. why do you hate god so much?"
        opts = {"search": query, "version": book}
        soup = self.getsoup(self.bg_search, opts, referer=self.bg_search)
        passage = soup.find("div", {"class": re.compile("passage-content")})
        for name in "heading passage-class-0", "publisher-info-bottom":
            junk = passage.find("div", name)
            if junk is not None:
                junk.extract()
        response = []
        for para in passage("p"):
            response.append(para.renderContents())
        res = decode(" ".join(response), "utf-8")

        # convert superscript verse markers to unicode
        while True:
            match = self.sup_re.search(res)
            if not match:
                break
            res = res.replace(match.group(0), superscript(match.group(1)))

        # XXX this is like this for a reason
        res = strip_html(res).replace(u"\xa0", u" ")
        while u"  " in res:
            res = res.replace(u"  ", u" ")
        res = res.strip()
        return res
Exemple #21
0
 def get_title(self, url):
     try:
         html = self.ua.open(url, size=2048)
         title = strip_html(self.title.search(html).group(1))
     except:
         title = url
     return title
Exemple #22
0
    def lookup_verse(self, query, book=None):
        """Lookup specified verse"""
        if book is None:
            book = self.DEFAULT_BIBLE
        elif book not in self.bibles:
            return u'Unknown bible.. why do you hate god so much?'
        opts = {'search': query, 'version': book}
        soup = getsoup(self.bg_search, opts, referer=self.bg_search)
        passage = soup.find('div', 'passage-wrap')
        for name in 'heading passage-class-0', 'publisher-info-bottom':
            passage.find('div', name).extract()
        response = []
        for para in passage('p'):
            response.append(para.renderContents())
        res = decode(' '.join(response), 'utf-8')

        # convert superscript verse markers to unicode
        while True:
            match = self.sup_re.search(res)
            if not match:
                break
            res = res.replace(match.group(0), superscript(match.group(1)))

        # XXX this is like this for a reason
        res = strip_html(res).replace(u'\xa0', u' ')
        while u'  ' in res:
            res = res.replace(u'  ', u' ')
        res = res.strip()
        return res
Exemple #23
0
 def response(self, nick, args, kwargs):
     source = self.sources[args[0]]
     try:
         query = args[1]
     except:
         query = None
     try:
         num = int(query)
         query = None
     except:
         num = None
     if num:
         url = source.bynum.replace(u'num', unicode(num))
         opts = None
     elif query:
         url = source.search
         opts = dict(source.opts)
         opts[source.query] = query
     else:
         url = source.random
         opts = None
     doc = geturl(url, opts=opts)
     entries = source.entries.findall(doc)
     if query:
         entries = filter(None, entries)
     entry = random.choice(entries)
     return '\n'.join(filter(None, strip_html(entry).strip().splitlines()))
 def sunrise_sunset(self, query, location):
     """Ask google for the sunrise or sunset from location"""
     soup = BeautifulSoup(self.ua.open(self.search, {'q': '%s in %s' % (query, location)}))
     image = soup.find('img', src=self.sun_re)
     row1 = image.findNext('td')
     row2 = row1.findNext('td')
     result = strip_html(u'%s (%s)' % (self.decode(row1), self.decode(row2)))
     return self.whitespace_re.sub(u' ', result.strip())
Exemple #25
0
 def response(self, nick, args, kwargs):
     fail = BeautifulSoup(self.geturl(self.url)).h1
     return self.spaces_re.sub(' ', strip_html(
         u'%s: %s: %s %s: %s' % (
             nick, self.col('red', text='FAIL'),
             self.fail_re.search(fail.renderContents()).group(1),
             self.col('green', text='FIX'),
             self.fail_re.search(fail.findNext('h1').renderContents()).group(1))))
Exemple #26
0
 def clock(self, query):
     """Use google to look up time in a given location"""
     doc = self.ua.open(self.search, {'q': 'time in %s' % query})
     soup = BeautifulSoup(doc)
     table = soup.find('li', attrs={'class': re.compile('obcontainer')})
     [subtable.extract() for subtable in table.findAll('table')]
     return re.sub(r'\s{2,}', ' ',
                   strip_html(self.decode(table).strip())).strip()
Exemple #27
0
    def response(self, nick, args, kwargs):
        url = urlunparse(('https', 'www.google.com', 'search', '',
                          urlencode({
                              'num': '100',
                              'safe': 'off',
                              'hl': 'en',
                              'q': 'site:songmeanings.com ' + args[0]
                          }), ''))
        soup = getsoup(url)
        new = None
        for h3 in soup.findAll('h3', attrs={'class': 'r'}):
            uri = urlparse(h3.a['href'])
            if uri.path == '/url':
                url = dict(parse_qsl(uri.query))['q']
                uri = urlparse(url)
                if re.search('/songs/view/\d+', uri.path) is not None:
                    new = urlunparse(uri._replace(query='', fragment=''))
                    break
                elif re.search(
                        '/profiles/(submissions|interaction)/\d+/comments',
                        uri.path) is not None:
                    soup = getsoup(url)
                    for a in soup.find('a', title='Direct link to comment'):
                        new = urlunparse(
                            urlparse(a.parent['href'])._replace(fragment='',
                                                                query=''))
                        break
                if new:
                    break
        if new:
            url = new
            try:
                soup = getsoup(url)
                try:
                    title = re.sub('\s+Lyrics\s+\|\s+SongMeanings.*$', '',
                                   soup.title.renderContents())
                except StandardError:
                    title = 'Unknown artist/song, check parsing code!'
                text = soup.find('div',
                                 attrs={'class': re.compile(r'.*lyric-box.*')})
                for a in text('a'):
                    a.extract()
            except StandardError:
                self.log.warn(
                    'unable to find textblock from url {0!r} (query: {1!r})'.
                    format(url, args[0]))
                return u'{nick}: {error}'.format(error=self.error, **kwargs)

            try:
                lyrics = decode(text.renderContents(), 'utf-8')
                return u'\n'.join(['[{}]'.format(title)] + filter(
                    None,
                    [line.strip()
                     for line in strip_html(lyrics).splitlines()]))
            except StandardError:
                self.log.exception(
                    'error parsing lyrics for query: {0!r}'.format(args[0]))
                return u'{nick}: {error}'.format(error=self.error, **kwargs)
 def response(self, nick, args, kwargs):
     opts = {'hl': 'en', 'aq': 'f', 'safe': 'off', 'q': args[0]}
     soup = getsoup(self.google_search, opts, referer=self.google_url)
     a = soup.body.find('a', 'spell')
     if a:
         res = strip_html(a.renderContents().decode('utf-8', 'ignore'))
     else:
         res = u'spelled correctly'
     return u'%s: %s' % (nick, res)
 def lookup(self, term, idx=1):
     """Lookup term in dictionary"""
     url = urljoin(self.define_url, quote(term.lower()))
     soup = getsoup(url, referer=self.base_url)
     for br in soup('br'):
         br.extract()
     val = strip_html(soup.renderContents().decode('utf-8'))
     val = val.replace(u'\xa0', ' ').replace('\n', ' ')
     return self.whitespace_re.sub(' ', val).strip()
def normalize(name):
    """Normalize city name for easy comparison"""
    name = strip_html(name)
    name = year.sub(u'', name)
    name = badchars.sub(u' ', name)
    name = name.lower()
    name = name.strip()
    name = whitespace.sub(u' ', name)
    return name
Exemple #31
0
 def lookup(self, term, idx=1):
     """Lookup term in dictionary"""
     url = urljoin(self.define_url, quote(term.lower()))
     soup = getsoup(url, referer=self.base_url)
     for br in soup('br'):
         br.extract()
     val = strip_html(decode(soup.renderContents(), 'utf-8'))
     val = val.replace(u'\xa0', ' ').replace('\n', ' ')
     return self.whitespace_re.sub(' ', val).strip()
Exemple #32
0
 def response(self, nick, args, kwargs):
     opts = {'hl': 'en', 'safe': 'off', 'q': args[0]}
     soup = self.getsoup(self.google_search, opts, referer=self.google_url)
     correct = soup.body.find('a', href=re.compile(r'^/search.*spell=1'))
     if correct:
         res = strip_html(decode(correct.renderContents(), 'utf-8'))
     else:
         res = u'spelled correctly. probably.'
     return u'%s: %s' % (nick, res)
 def rate_rt(self, name):
     """Rating from rotten tomatoes"""
     soup = getsoup(self.rt_search, {'search': name}, referer=self.rt_url)
     ourname = self.normalize(name)
     results = soup.find('ul', id='movie_results_ul')
     if results is None:
         rating = soup.find('span', id='all-critics-meter').renderContents() + '%'
         title = strip_html(soup.find('h1', 'movie_title').renderContents().encode('utf-8', 'ignore')).strip()
         return title, rating
     else:
         for result in results('li'):
             try:
                 rating = strip_html(result.find('span', 'tMeterScore').renderContents()).strip()
                 title = strip_html(result.find('div', 'media_block_content').h3.a.renderContents()).strip()
                 if ourname == self.normalize(title):
                     return title, rating
             except AttributeError:
                 pass
Exemple #34
0
 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {'s': 'tt', 'q': name, 'exact': 'true'}, referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'Find - IMDb':
         url = urljoin(self.imdb_url, soup.body.find('table', 'findList').tr.find('td', 'result_text').a['href'])
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = soup.find('span', itemprop='ratingValue').renderContents()
     realname = strip_html(soup.title.renderContents().replace(' - IMDb', ''))
     return realname, rating
Exemple #35
0
 def response(self, nick, args, kwargs):
     fail = BeautifulSoup(geturl(self.url)).h1
     return self.spaces_re.sub(
         ' ',
         strip_html(u'%s: %s: %s %s: %s' %
                    (nick, self.col('red', text='FAIL'),
                     self.fail_re.search(fail.renderContents()).group(1),
                     self.col('green', text='FIX'),
                     self.fail_re.search(
                         fail.findNext('h1').renderContents()).group(1))))
Exemple #36
0
 def forecast(self, location):
     '''get weather forecast'''
     try:
         page = geturl(url=self.forecast_url, opts={u'query':location}).encode('utf-8')
         xml = ElementTree.fromstring(page)
         text = strip_html(xml.find('.//fcttext').text)
     except Exception, e:
         self.log.warn(u'error in module %s' % self.__module__)
         self.log.exception(e)
         return "error looking up forecast for location: %s" % location
Exemple #37
0
 def forecast(self, location):
     '''get weather forecast'''
     try:
         page = geturl(url=self.forecast_url, opts={u'query':location}).encode('utf-8')
         xml = ElementTree.fromstring(page)
         text = strip_html(xml.find('.//fcttext').text)
     except Exception, e:
         self.log.warn(u'error in module %s' % self.__module__)
         self.log.exception(e)
         return "error looking up forecast for location: %s" % location
 def clock(self, query):
     """Use google to look up time in a given location"""
     try:
         doc = self.ua.open(self.search, {'q': 'time in %s' % query})
         soup = BeautifulSoup(doc)
         table = soup.find('table', 'obcontainer')
         time = table.find('td', style='font-size:medium')
         return strip_html(time.renderContents().decode('utf-8')).strip()
     except:
         raise
Exemple #39
0
 def clock(self, query):
     """Use google to look up time in a given location"""
     try:
         doc = self.ua.open(self.search, {'q': 'time in %s' % query})
         soup = BeautifulSoup(doc)
         table = soup.find('div', 'obcontainer')
         time = table.find('td', style='font-size:medium')
         return strip_html(self.decode(time).strip())
     except:
         raise
Exemple #40
0
 def forecast(self, location):
     """get weather forecast"""
     try:
         page = geturl(url=self.forecast_url, opts={u"query": location}).encode("utf-8")
         xml = ElementTree.fromstring(page)
         text = strip_html(xml.find(".//fcttext").text)
     except Exception, e:
         self.log.warn(u"error in module %s" % self.__module__)
         self.log.exception(e)
         return "error looking up forecast for location: %s" % location
Exemple #41
0
 def extract_quote(self, obj):
     li = obj.find(u'li')
     contents = li.contents
     contents = [unicode(part) for part in contents]
     quote = u' '.join(contents)
     quote = strip_html(quote)
     quote = _linebreak.sub(u' ', quote)
     quote = _whitespace.sub(u' ', quote)
     quote = quote.strip()
     return quote
Exemple #42
0
 def sunrise_sunset(self, query, location):
     """Ask google for the sunrise or sunset from location"""
     soup = BeautifulSoup(
         self.ua.open(self.search, {'q': '%s in %s' % (query, location)}))
     image = soup.find('img', src=self.sun_re)
     row1 = image.findNext('td')
     row2 = row1.findNext('td')
     result = strip_html(u'%s (%s)' %
                         (self.decode(row1), self.decode(row2)))
     return self.whitespace_re.sub(u' ', result.strip())
 def translate(self, text, src, dst):
     """Perform the translation"""
     opts = {'langpair': '%s|%s' % (self.langs[src], self.langs[dst]), 'v': '1.0', 'q': text}
     res = simplejson.loads(geturl(self.url, opts))['responseData']
     text = strip_html(res['translatedText'])
     try:
         text = u'[detected %s] %s' % (self.lookup[res['detectedSourceLanguage']].capitalize(), text)
     except KeyError:
         pass
     return text
Exemple #44
0
 def extract_quote(self, obj):
     li = obj.find(u'li')
     contents = li.contents
     contents = [unicode(part) for part in contents]
     quote = u' '.join(contents)
     quote = strip_html(quote)
     quote = self._linebreak.sub(u' ', quote)
     quote = self._whitespace.sub(u' ', quote)
     quote = quote.strip()
     return quote
Exemple #45
0
 def response(self, nick, args, kwargs):
     doc = geturl(self.random, add_headers={'Accept': '*/*'})
     soup = BeautifulSoup(doc)
     main = soup.find(u'div', attrs={u'id': u'main'})
     confs = main.findAll(u'div', attrs={u'class': u'content'})
     conf = random.choice(confs)
     conf = [unicode(p) for p in conf.findAll(u'p')]
     conf = u' '.join(conf)
     conf = strip_html(conf)
     conf = conf.strip()
     return conf
 def normalize(self, name):
     """Normalize a movie title for easy comparison"""
     name = strip_html(name)
     name = self.year_re.sub('', name)              # strip trailing year
     name = self.rev_article_re.sub(r'\2 \1', name) # Movie, The = The Movie
     name = self.articles_re.sub('', name)          # strip leading the/an
     name = self.badchars_re.sub(' ', name)         # only allow alnum
     name = name.lower()                            # lowercase only
     name = name.strip()                            # strip whitespace
     name = self.whitespace_re.sub(' ', name)       # compress whitespace
     return name
Exemple #47
0
 def response(self, nick, args, kwargs):
     doc = geturl(self.random, add_headers={'Accept': '*/*'})
     soup = BeautifulSoup(doc)
     main = soup.find(u'div', attrs={u'id': u'main'})
     confs = main.findAll(u'div', attrs={u'class': u'content'})
     conf = random.choice(confs)
     conf = [unicode(p) for p in conf.findAll(u'p')]
     conf = u' '.join(conf)
     conf = strip_html(conf)
     conf = conf.strip()
     return conf
Exemple #48
0
 def response(self, nick, args, kwargs):
     kwargs["req"].quoted = True
     soup = self.getsoup(url)
     posts = soup.body("div", "content")
     contents = []
     for post in posts:
         a = post.find("a", href=re.compile(r"Text-Replies"))
         if a is not None:
             content = u" ".join(strip_html(decode(a.renderContents())).strip().splitlines())
             contents.append(content)
     return random.choice(contents)
Exemple #49
0
 def rate_rt_audience(self, name):
     """Audience Rating from rotten tomatoes"""
     soup = self.getsoup(self.rt_search, {'search': name}, referer=self.rt_url)
     ourname = self.normalize(name)
     results = soup.find('ul', id='movie_results_ul')
     if results is None:
         rating = soup.find(name="span", attrs={ "class" : "meter popcorn numeric " }).renderContents() + "%"
         title = strip_html(encode(soup.find('h1', 'movie_title').renderContents(), 'utf-8')).strip()
         return title, rating
     else:
         for result in results('li'):
             try:
                 title = strip_html(result.find('div', 'media_block_content').h3.a.renderContents()).strip()
                 if ourname == self.normalize(title):
                     url = result.h3.a['href']
                     innerSoup = self.getsoup(self.rt_url+url, { }, self.rt_search, {'search': name})
                     rating = innerSoup.find(name="span", attrs= { "class" : "meter popcorn numeric " }).renderContents() + "%"
                     return title, rating
             except AttributeError:
                 pass
         return
Exemple #50
0
 def normalize(self, name):
     """Normalize a movie title for easy comparison"""
     name = strip_html(name)
     name = self.year_re.sub('', name)  # strip trailing year
     name = self.rev_article_re.sub(r'\2 \1',
                                    name)  # Movie, The = The Movie
     name = self.articles_re.sub('', name)  # strip leading the/an
     name = self.badchars_re.sub(' ', name)  # only allow alnum
     name = name.lower()  # lowercase only
     name = name.strip()  # strip whitespace
     name = self.whitespace_re.sub(' ', name)  # compress whitespace
     name = self.and_re.sub(' ', name)  # the word "and"
     return name
Exemple #51
0
 def response(self, nick, args, kwargs):
     try:
         query = args[0]
         doc = self.geturl(self.search, opts={u'verbose': u'on', u'name': query})
         drink = self.drink.search(doc).group(1)
         url = urljoin(self.baseurl, drink)
         doc = self.geturl(url)
         title = self.title.search(doc).group(1)
         ingredients = self.ingredients.findall(doc)
         instructions = self.instructions.search(doc).group(1)
         response = strip_html(u'%s - %s - %s' % (title, u', '.join(ingredients), instructions))
     except Exception, error:
         response = u"That's a made-up drink, sorry."
Exemple #52
0
    def bodycount(self):

        try:
            doc = geturl(self._bodycount_url)
            data = self._bodycount_re.search(doc).group(1)
            data = decode(data, 'ascii')
            data = strip_html(data)
            data = self._re_whitespace.sub(u' ', data)
            data = data.strip()
            return data
        except Exception, error:
            self.log.warn(u'error in module %s' % self.__module__)
            self.log.exception(error)
            return u'UNKNOWN'
Exemple #53
0
 def on_message(self, user, message, private, addressed, chat=None):
     """Process incoming messages and dispatch to main bot"""
     if user.name == self.bot.botname():
         return
     message = strip_html(message)
     req = Request(message=message)
     req.nick = user.name
     req.channel = u'AIM'
     req.aim = self
     req.private = private
     req.addressed = addressed
     req.chat = chat
     self.bot.log.info(u'[AIM] <%s> %s' % (req.nick, req.message))
     self.bot.check_addressing(req)
     self.bot.process_message(req)
Exemple #54
0
 def response(self, nick, args, kwargs):
     try:
         url = args[0]
         uri = urlparse(url)
         if (uri.scheme.lower() in SCHEMES and
                 '.'.join(uri.netloc.lower().split('.')[-2:]) in DOMAINS and
                 os.path.split(os.path.normpath(uri.path))[-1] == 'watch' and
                 'v' in cgi.parse_qs(uri.query)):
             soup = getsoup(url)
             title = strip_html(decode(soup.title.renderContents())).replace(u' - YouTube', u'').strip()
             if title:
                 self.bot.output(title, kwargs['req'])
     except (KeyboardInterrupt, SystemExit):
         raise
     except:
         pass
Exemple #55
0
    def get_summary(self, query):
        soup, title = self.get_soup(query)

        # check if this is a disambiguation page, if so construct special page
        # there isn't a consistent style guide, so we just try to do the
        # most common format (ordered list of links). if this fails, return
        # a friendly failure for now
        if soup.find(u'div', attrs={u'id': u'disambig'}):
            try:
                summary = u'%s (Disambiguation) - ' % title
                for link in soup.find(u'ul').findAll(u'a'):
                    title = unicode(link[u'title']).strip()
                    if len(summary) + len(title) + 2 > self.summary_size:
                        break
                    if not summary.endswith(u' '):
                        summary += u', '
                    summary += title
            except:
                summary = u'Fancy, unsupported disambiguation page!'
            return summary

        # massage into plain text by concatenating paragraphs
        content = []
        for para in soup.findAll(u'p'):
            content.append(unicode(para))
        content = u' '.join(content)

        # clean up rendered text
        content = strip_html(content)  # strip markup
        content = Wiki._citations.sub(u'', content)  # remove citations
        content = Wiki._parens.sub(u'', content)  # remove parentheticals
        content = Wiki._whitespace.sub(u' ', content)  # compress whitespace
        content = Wiki._fix_punc.sub(r'\1', content)  # fix punctuation
        content = content.strip()  # strip whitespace

        # search error
        if title == self.error:
            return u'No results found for "%s"' % query

        # generate summary by adding as many sentences as possible before limit
        summary = u'%s -' % title
        for sentence in Wiki._sentence.findall(content):
            if len(summary) + 1 + len(sentence) > self.summary_size:
                break
            summary += u' %s' % sentence
        return summary
Exemple #56
0
    def response(self, nick, args, kwargs):
        query = args[0]
        if query is None or query == u'':
            url = self.random_url
        else:
            query = u' '.join(query.split())
            query = query.replace(u' ', u'_')
            query = encode(query, 'utf-8')
            query = urllib.quote(query) + u'.php'
            url = urljoin(self.baseurl, query)
        doc = geturl(url)
        result = self.joke.findall(doc)[0]
        result = strip_html(result)

        # cleanup output a bit.. some funny whitespace in it -cj
        result = result.replace(u'\x14', u' ')
        result = result.replace(u'\n', u' ')
        result = re.sub(r'\s{2,}', u' ', result)
        return result.strip()
Exemple #57
0
    def response(self, nick, args, kwargs):
        query = args[0]
        check = self.clean.sub(u'', query)
        check = re.compile(re.escape(check), re.I)

        results = []
        page = getsoup(self.url)
        table = page.find('table', id='gvIncidents')
        rows = table('tr')[1:]
        for row in rows:
            _, num, time, type, loc, coord, area = [
                strip_html(cell.renderContents()) for cell in row('td')
            ]
            if check.search(loc):
                results.append(u'=> %s: %s (%s) %s' % (time, loc, area, type))
        if len(results) > 0:
            return u'\n'.join(results)
        else:
            return u'%s: No incidents found' % nick
Exemple #58
0
    def get_quote(self, symbols):
        """Looks up the symbol from finance.yahoo.com, returns formatted result"""
        symbols = [quote(symbol) for symbol in symbols.split()]
        url = Yahoo._quote_url.replace(u'SYMBOL', "+".join(symbols))
        page = geturl(url)

        results = []
        for line in page.splitlines():
            data = csv.reader([line]).next()
            symbol = data[0]
            name = data[1]
            trade_time, last_trade = strip_html(data[3]).split(" - ")
            last_trade = locale.atof(last_trade)
            try:
                last_close = locale.atof(data[2])
                exchange = False
            except ValueError:
                last_close = last_trade
                exchange = True

            if trade_time == "N/A":
                trade_time = u'market close'

            if exchange:
                results.append(u'%s (%s) - %s: %.4f' % (name, symbol, trade_time, last_trade))
            else:
                delta = last_trade - last_close
                try:
                    delta_perc = delta * 100.0 / last_close
                except ZeroDivisionError:
                    delta_perc = 0.00
                if delta < 0:
                    color = u'red'
                elif delta > 0:
                    color = u'green'
                else:
                    color = u'white'
                text = self.colorlib.get_color(color, text=u'%.2f (%+.2f %+.2f%%)' % (last_trade, delta, delta_perc))
                results.append(u'%s (%s) - Open: %.2f | %s: %s' % (name, symbol, last_close, trade_time, text))


        return u'\n'.join(results)
Exemple #59
0
 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {
         's': 'tt',
         'q': name,
         'exact': 'true'
     },
                   referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'Find - IMDb':
         url = urljoin(
             self.imdb_url,
             soup.body.find('table',
                            'findList').tr.find('td',
                                                'result_text').a['href'])
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     rating = soup.find('span', itemprop='ratingValue').renderContents()
     realname = strip_html(soup.title.renderContents().replace(
         ' - IMDb', ''))
     return realname, rating