Python geturl Examples, include.useragent.geturl Python Examples

Example #1

0

Show file

File: slut.py Project: compbrain/madcow

def slutrating(phrase):

    phrase = cleanurl(phrase)

    for i in range(5):  # Try up to 5 times to get a good result
        try:
            data = geturl(searchURL, opts={u'q': phrase, u'safe': u'off'})
            unsafe = int(match_re.search(data).group(1).replace(u',', u''))
        except AttributeError:
            unsafe = 0

        try:
            data = geturl(searchURL, opts={u'q': phrase, u'safe': u'active'})
            try:
                filtered = filter_re.search(data).group(1)
                raise WordFiltered(filtered)
            except AttributeError:
                pass
            safe = int(match_re.search(data).group(1).replace(u',', u''))
        except AttributeError:
            safe = 0

        if not unsafe:
            if safe > 0:
                continue # shouldn't really be possible to have safe w/o unsafe
            else:
                return 0

        value = float(unsafe - safe) / float(unsafe)
        if value > 0:
            return value

Example #2

0

Show file

File: movie.py Project: compbrain/madcow

 def rate_imdb(self, name):
     """Get user rating from IMDB"""
     page = geturl(self.imdb_search, {'s': 'tt', 'q': name},
                   referer=self.imdb_url)
     soup = BeautifulSoup(page)
     if soup.title.renderContents() == 'IMDb Title Search':
         main = soup.body.find('div', id='main')
         name = self.normalize(name)
         url = None
         for p in main('p', style=None):
             for row in p.table('tr'):
                 link = row('td')[2].a
                 if self.normalize(link.renderContents()) == name:
                     url = urljoin(self.imdb_url, link['href'])
                     break
             if url:
                 break
         else:
             raise ValueError('no exact matches')
         soup = BeautifulSoup(geturl(url, referer=self.imdb_search))
     try:
         rating = soup.body.find('div', 'meta').b.renderContents()
     except AttributeError:
         rating = 'Unrated'
     return stripHTML(soup.title.renderContents()), rating

Example #3

0

Show file

 def get_song_for_artist(self, qsong, qartist):
     opts = dict(self.search_opts)
     opts['type'] = 'artist'
     opts['q'] = qartist
     page = geturl(self.search, opts=opts, referer=self.baseurl)
     results = self.result.findall(page)
     exact = None
     for url, artist in results:
         if normalize(artist) == normalize(qartist):
             exact = url
             break
     if not exact:
         exact = results[0][0]
     url = urljoin(self.baseurl, exact)
     url = urljoin(url, 'lyrics.html')
     page = geturl(url, referer=self.search)
     results = self.result.findall(page)
     exact = None
     for url, song in results:
         song = song.replace(' lyrics', '')
         if normalize(song) == normalize(qsong):
             exact = url
             break
     if not exact:
         exact = results[0][0]
     url = urljoin(self.baseurl, exact)
     return self.get_lyrics_from_url(url)

Example #4

0

Show file

File: steam.py Project: compbrain/madcow

 def response(self, nick, args, kwargs):
     try:
         group_page = geturl(self.group_url)
         ids = {}
         for id, name, status in self.member_re.findall(group_page):
             ids[id] = dict(name=name, status=status)
         for id, link in self.link_re.findall(group_page):
             ids[id]['link'] = link
         ingame = []
         online = []
         for data in ids.values():
             if data['status'] == 'In-Game':
                 page = geturl(data['link'])
                 try:
                     game = self.game_re.search(page).group(1).strip()
                 except AttributeError:
                     game = 'Non-Steam Game'
                 ingame.append('%s: %s' % (data['name'], game))
             elif data['status'] == 'Online' and self.online:
                 online.append('%s: Online' % data['name'])
         output = ingame + online
         if not output:
             if self.online:
                 message = 'Online'
             else:
                 message = 'In-Game'
             output = ['No users ' + message]
         return '\n'.join(output)
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'%s: %s' % (nick, error)

Example #5

0

Show file

File: wardb.py Project: compbrain/madcow

 def lookup_item(self, item):
     page = geturl(self._search_url, opts={u'search_text': item})
     item = item.lower()
     redirect = self._redirect_re.search(page)
     if redirect:
         url = urljoin(self._base_url, redirect.group(1))
         page = geturl(url)
     elif u'Search results for' in page:
         items = self._results_re.findall(page)
         if not items:
             return
         items = [(v.lower(), k) for k, v in items]
         items = sorted(items)
         map = dict(items)
         if item in map:
             url = map[item]
         else:
             url = items[0][1]
         page = geturl(url)
     bonus = u', '.join(self._bonus_re.findall(page))
     bonus = self._stat_gap_re.sub(r'\1\2', bonus)
     if not bonus:
         bonus = u'No bonuses'
     rarity, name = self._item_name_re.search(page).groups()
     color = self._rarity_colors[rarity]
     name = name.replace('\\', '')
     name = name.strip()
     name = self.colorlib.get_color(color, text=name)
     return u'%s: %s' % (name, bonus)

Example #6

0

Show file

File: area.py Project: gipi/Richie

 def response(self, nick, args, kwargs):
     try:
         geturl(self.baseurl)
         doc = geturl(self.searchurl, opts={'number': args[0]})
         city, state = self.city.search(doc).groups()
         city = ' '.join([x.lower().capitalize() for x in city.split()])
         return '%s: %s = %s, %s' % (nick, args[0], city, state)
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return "%s: I couldn't look that up for some reason.  D:" % nick

Example #7

0

Show file

File: area.py Project: gipi/Richie

 def response(self, nick, args, kwargs):
     try:
         geturl(self.baseurl)
         doc = geturl(self.searchurl, opts={'number': args[0]})
         city, state = self.city.search(doc).groups()
         city = ' '.join([x.lower().capitalize() for x in city.split()])
         return '%s: %s = %s, %s' % (nick, args[0], city, state)
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return "%s: I couldn't look that up for some reason.  D:" % nick

Example #8

0

Show file

File: chp.py Project: gipi/Richie

    def response(self, nick, args, kwargs):
        query = args[0]
        try:
            check = self.clean.sub('', query)
            check = re.compile(check, re.I)

            results = []
            doc = geturl(self.url)
            for i in self.incidents.findall(doc):
                data = [stripHTML(c) for c in self.data.findall(i)][1:]
                if len(data) != 4:
                    continue
                if check.search(data[2]):
                    results.append('=> %s: %s - %s - %s' %
                                   (data[0], data[1], data[2], data[3]))

            if len(results) > 0:
                return '\n'.join(results)
            else:
                return '%s: No incidents found' % nick

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return '%s: I failed to perform that lookup' % nick

Example #9

0

Show file

File: bash.py Project: compbrain/madcow

 def response(self, nick, args, kwargs):
     try:
         source = self.sources[args[0]]
         try:
             query = args[1]
         except:
             query = None
         try:
             num = int(query)
             query = None
         except:
             num = None
         if num:
             url = source.bynum.replace(u'num', unicode(num))
             opts = None
         elif query:
             url = source.search
             opts = dict(source.opts)
             opts[source.query] = query
         else:
             url = source.random
             opts = None
         doc = geturl(url, opts=opts)
         entries = source.entries.findall(doc)
         if query:
             entries = filter(None, entries)
         entry = random.choice(entries)
         entry = stripHTML(entry).strip()
         return entry
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'%s: %s' % (nick, self._error)

Example #10

0

Show file

File: chp.py Project: gipi/Richie

    def response(self, nick, args, kwargs):
        query = args[0]
        try:
            check = self.clean.sub('', query)
            check = re.compile(check, re.I)

            results = []
            doc = geturl(self.url)
            for i in self.incidents.findall(doc):
                data = [stripHTML(c) for c in self.data.findall(i)][1:]
                if len(data) != 4:
                    continue
                if check.search(data[2]):
                    results.append('=> %s: %s - %s - %s' % (data[0], data[1],
                        data[2], data[3]))

            if len(results) > 0:
                return '\n'.join(results)
            else:
                return '%s: No incidents found' % nick

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return '%s: I failed to perform that lookup' % nick

Example #11

0

Show file

    def response(self, nick, args, kwargs):
        try:
            try:
                user = args[0]
            except:
                user = None

            if user is None or user == '':
                doc = geturl(self.randomURL)
                user = re.search('"currentJournal": "(.*?)"', doc).group(1)

            url = urljoin(self.baseURL, '/users/%s/data/rss' % user)
            feed = rssparser.parse(url)

            # get latest entry and their homepage url
            entry = feed['items'][0]['description']
            page = feed['channel']['link']

            # strip out html
            entry = stripHTML(entry)

            # detect unusual amounts of high ascii, probably russian journal
            if isUTF8(entry):
                return '%s: Russian LJ :(' % nick

            # these can get absurdly long
            entry = entry[:self.max]

            return '%s: [%s] %s' % (nick, page, entry)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: Couldn't load the page LJ returned D:" % nick

Example #12

0

Show file

 def get_lyrics_from_url(self, url):
     page = geturl(url, referer=self.baseurl)
     soup = BeautifulSoup(page)
     content = soup.find('div', attrs={'id': 'content'})
     [div.extract() for div in content.findAll('div')]
     [link.extract() for link in content.findAll('a')]
     [script.extract() for script in content.findAll('script')]
     lines = [str(line) for line in content.contents]
     data = ''.join(lines)
     data = self._newline.sub('', data)
     data = self._leadbreak.sub('', data)
     data = self._endbreak.sub('', data)
     lines = self._break.split(data)
     verses = []
     while True:
         try:
             i = lines.index('')
             verse, lines = lines[:i], lines[i+1:]
             verses.append(verse)
         except ValueError:
             verses.append(lines)
             break
     for i, verse in enumerate(verses):
         verse = ' / '.join(verse)
         verse = whitespace.sub(' ', verse)
         verses[i] = verse
     if self._spam in verses:
         del verses[verses.index(self._spam)]
     return verses

Example #13

0

Show file

File: election2008.py Project: compbrain/madcow

 def response(self, nick, args, kwargs):
     try:
         page = geturl(self._baseurl)
         try:
             score = self._score_re.search(page).group(1)
             dem = self._dem_re.search(score).groups()
             gop = self._gop_re.search(score).groups()
             # XXX diebold patch :D
             #dem, gop = (dem[0], gop[1]), (gop[0], dem[1])
             tie = None
             try:
                 tie = self._tie_re.search(score).groups()
             except AttributeError:
                 pass
         except AttributeError:
             raise Exception(u"couldn't parse page")
         output = [self.colorize(u'blue', *dem), self.colorize(u'red', *gop)]
         if tie:
             output.append(self.colorize(u'white', *tie))
         return u'%s: Projected Senate Seats 2010: %s' % (
                 nick, u', '.join(output))
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'%s: %s' % (nick, error)

Example #14

0

Show file

File: dictionary.py Project: gipi/Richie

    def response(self, nick, args, kwargs):
        word = args[0].lower()
        try:
            try:
                num = int(args[1])
            except:
                num = 1

            url = urljoin(self.base_url, word)
            doc = geturl(url)
            defs = self.re_defs.search(doc).group(1)
            defs = self.re_newline.sub('', defs)
            defs = self.re_def_break.split(defs)
            if len(defs) > 1:
                defs.pop(0)
            if num > len(defs):
                num = 1
            definition = defs[num - 1]
            definition = stripHTML(definition)
            definition = self.header.sub('', definition)

            return '%s: [%s/%s] %s' % (nick, num, len(defs), definition)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: I couldn't look that up for some reason.  D:" % nick

Example #15

0

Show file

File: livejournal.py Project: gipi/Richie

    def response(self, nick, args, kwargs):
        try:
            try:
                user = args[0]
            except:
                user = None

            if user is None or user == '':
                doc = geturl(self.randomURL)
                user = re.search('"currentJournal": "(.*?)"', doc).group(1)

            url = urljoin(self.baseURL, '/users/%s/data/rss' % user)
            feed = rssparser.parse(url)

            # get latest entry and their homepage url
            entry = feed['items'][0]['description']
            page = feed['channel']['link']

            # strip out html
            entry = stripHTML(entry)

            # detect unusual amounts of high ascii, probably russian journal
            if isUTF8(entry):
                return '%s: Russian LJ :(' % nick

            # these can get absurdly long
            entry = entry[:self.max]

            return '%s: [%s] %s' % (nick, page, entry)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: Couldn't load the page LJ returned D:" % nick

Example #16

0

Show file

File: dictionary.py Project: gipi/Richie

    def response(self, nick, args, kwargs):
        word = args[0].lower()
        try:
            try:
                num = int(args[1])
            except:
                num = 1

            url = urljoin(self.base_url, word)
            doc = geturl(url)
            defs = self.re_defs.search(doc).group(1)
            defs = self.re_newline.sub('', defs)
            defs = self.re_def_break.split(defs)
            if len(defs) > 1:
                defs.pop(0)
            if num > len(defs):
                num = 1
            definition = defs[num - 1]
            definition = stripHTML(definition)
            definition = self.header.sub('', definition)

            return '%s: [%s/%s] %s' % (nick, num, len(defs), definition)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: I couldn't look that up for some reason.  D:" % nick

Example #17

0

Show file

File: war.py Project: gipi/Richie

 def time(self):
     try:
         doc = geturl(DoomsDay._url)
         time = DoomsDay._re_time.search(doc).group(1)
         return time
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return 'UNKNOWN'

Example #18

0

Show file

 def time(self):
     try:
         doc = geturl(DoomsDay._url)
         time = DoomsDay._re_time.search(doc).group(1)
         return time
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return 'UNKNOWN'

Example #19

0

Show file

File: war.py Project: compbrain/madcow

 def time(self):
     try:
         doc = geturl(DoomsDay._url)
         time = self._re_time.search(doc).group(1)
         return time
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'UNKNOWN'

Example #20

0

Show file

File: webtender.py Project: compbrain/madcow

 def response(self, nick, args, kwargs):
     query = args[0]
     try:
         doc = geturl(self.search, opts={u'verbose': u'on', u'name': query})
         drink = self.drink.search(doc).group(1)
         url = urljoin(self.baseurl, drink)
         doc = geturl(url)
         title = self.title.search(doc).group(1)
         ingredients = self.ingredients.findall(doc)
         instructions = self.instructions.search(doc).group(1)
         response = u'%s: %s - %s - %s' % (
                 nick, title, u', '.join(ingredients), instructions)
         response = stripHTML(response)
         return response
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u"%s: Something ungood happened looking that up, sry" % nick

Example #21

0

Show file

File: webtender.py Project: gipi/Richie

 def response(self, nick, args, kwargs):
     query = args[0]
     try:
         doc = geturl(self.search, opts={'verbose': 'on', 'name': query})
         drink = self.drink.search(doc).group(1)
         url = urljoin(self.baseurl, drink)
         doc = geturl(url)
         title = self.title.search(doc).group(1)
         ingredients = self.ingredients.findall(doc)
         instructions = self.instructions.search(doc).group(1)
         response = '%s: %s - %s - %s' % (
             nick, title, ', '.join(ingredients), instructions)
         response = stripHTML(response)
         return response
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return "%s: Something ungood happened looking that up, sry" % nick

Example #22

0

Show file

File: war.py Project: compbrain/madcow

 def level(self):
     try:
         doc = geturl(Terror._url)
         level = self._re_level.search(doc).group(1)
         color = self._color_map[level.lower()]
         return self.colorlib.get_color(color, text=level)
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'UNKNOWN'

Example #23

0

Show file

 def level(self):
     try:
         doc = geturl(Terror._url)
         level = Terror._re_level.search(doc).group(1)
         color = Terror._color_map[level.lower()]
         return '\x03%s,1\x16\x16%s\x0f' % (color, level)
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return 'UNKNOWN'

Example #24

0

Show file

File: war.py Project: gipi/Richie

 def level(self):
     try:
         doc = geturl(Terror._url)
         level = Terror._re_level.search(doc).group(1)
         color = Terror._color_map[level.lower()]
         return '\x03%s,1\x16\x16%s\x0f' % (color, level)
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return 'UNKNOWN'

Example #25

0

Show file

File: movie.py Project: gipi/Richie

    def rate(self, movie):
        try:
            opts = dict(self.movie_opts)
            opts['ts'] = movie
            page = geturl(self.search, opts=opts)
            movie = normalize(movie)
            movies = self.result.findall(page)
            movies = [(path, normalize(title)) for path, title in movies]
            url = None
            for path, title in movies:
                if title == movie:
                    url = urljoin(self.baseurl, path)
                    break
            if not url:
                url = urljoin(self.baseurl, movies[0][0])
            page = geturl(url, referer=self.search)
            try:
                critic_rating = self.critic_rating.search(page).group(1)
                critic_rating = 'Critics: ' + critic_rating + '/100'
            except:
                critic_rating = None
            try:
                user_rating = self.user_rating.search(page).group(1)
                user_rating = 'Users: ' + user_rating + '/10'
            except:
                user_rating = None

            title = html_title.search(page).group(1)
            title = title.replace(': Reviews', '')

            response = 'Meta'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            ratings = [
                i for i in (critic_rating, user_rating) if i is not None
            ]
            ratings = ', '.join(ratings)
            if ratings:
                response += ' - %s' % ratings
            return response
        except:
            return

Example #26

0

Show file

File: movie.py Project: compbrain/madcow

 def rate_rt(self, name):
     """Rating from rotten tomatoes"""
     page = geturl(self.rt_search, {'search': name}, referer=self.rt_url)
     soup = BeautifulSoup(page)
     for table in soup.body('table'):
         if table.caption.renderContents() == 'Movies':
             break
     else:
         raise ValueError('no movies found in search results')
     name = self.normalize(name)
     for row in table.tbody('tr'):
         link = row.a
         if self.normalize(link.renderContents()) == name:
             url = urljoin(self.rt_url, link['href'])
             break
     else:
         raise ValueError('no exact matches')
     soup = BeautifulSoup(geturl(url, referer=self.rt_search))
     info = soup.body.find('div', 'movie_info_area')
     return stripHTML(info.h1.renderContents()), info.a['title']

Example #27

0

Show file

File: texts.py Project: compbrain/madcow

def get_text():
    page = geturl(url)
    soup = BeautifulSoup(page)
    texts = soup.body.findAll('div', 'post_content')
    text = random.choice(texts)
    text = text.renderContents()
    text = stripHTML(text)
    text = text.splitlines()
    text = [line.strip() for line in text]
    text = [line for line in text if line]
    text = u'\n'.join(text)
    return text

Example #28

0

Show file

File: traffic.py Project: gipi/Richie

 def response(self, nick, args, kwargs):
     try:
         from_loc = self.get_location_data(args[0])
         to_loc = self.get_location_data(args[1])
         opts = {
             'city': from_loc[0],
             'main': from_loc[1],
             'cross': from_loc[2],
         }
         page = geturl(self.second_url, opts=opts, referer=self.start_url)
         origin = self.re_origin.search(page).group(1)
         opts = {
             'city': to_loc[0],
             'main': to_loc[1],
             'cross': to_loc[2],
             'origin': origin,
             'originCity': from_loc[0],
             'originMain': from_loc[1],
             'originCross': from_loc[2],
         }
         page = geturl(self.report_url, opts=opts, referer=self.second_url)
         time, miles, table = self.re_trip.search(page).groups()
         rows = self.re_rows.findall(table)[2:]
         speeds = []
         for row in rows:
             try:
                 road, speed = self.re_cells.findall(row)[:2]
                 road = self.re_tags.sub('', road)
                 road = road.replace(' ', '')
                 speed = self.re_tags.sub('', speed)
                 speed = speed.replace(' or higher', '')
                 speeds.append('%s=%s' % (road, speed))
             except:
                 continue
         speeds = ', '.join(speeds)
         return '%s: %s mins. (%s miles) [%s]' % (nick, time, miles, speeds)
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return '%s: %s' % (nick, self.error)

Example #29

0

Show file

File: movie.py Project: gipi/Richie

    def rate(self, movie):
        try:
            opts = dict(self.movie_opts)
            opts['ts'] = movie
            page = geturl(self.search, opts=opts)
            movie = normalize(movie)
            movies = self.result.findall(page)
            movies = [(path, normalize(title)) for path, title in movies]
            url = None
            for path, title in movies:
                if title == movie:
                    url = urljoin(self.baseurl, path)
                    break
            if not url:
                url = urljoin(self.baseurl, movies[0][0])
            page = geturl(url, referer=self.search)
            try:
                critic_rating = self.critic_rating.search(page).group(1)
                critic_rating = 'Critics: ' + critic_rating + '/100'
            except:
                critic_rating = None
            try:
                user_rating = self.user_rating.search(page).group(1)
                user_rating = 'Users: ' + user_rating + '/10'
            except:
                user_rating = None

            title = html_title.search(page).group(1)
            title = title.replace(': Reviews', '')

            response = 'Meta'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            ratings = [i for i in (critic_rating, user_rating) if i is not None]
            ratings = ', '.join(ratings)
            if ratings:
                response += ' - %s' % ratings
            return response
        except:
            return

Example #30

0

Show file

File: traffic.py Project: gipi/Richie

 def response(self, nick, args, kwargs):
     try:
         from_loc = self.get_location_data(args[0])
         to_loc = self.get_location_data(args[1])
         opts = {
             'city': from_loc[0],
             'main': from_loc[1],
             'cross': from_loc[2],
         }
         page = geturl(self.second_url, opts=opts, referer=self.start_url)
         origin = self.re_origin.search(page).group(1)
         opts = {
             'city': to_loc[0],
             'main': to_loc[1],
             'cross': to_loc[2],
             'origin': origin,
             'originCity': from_loc[0],
             'originMain': from_loc[1],
             'originCross': from_loc[2],
         }
         page = geturl(self.report_url, opts=opts, referer=self.second_url)
         time, miles, table = self.re_trip.search(page).groups()
         rows = self.re_rows.findall(table)[2:]
         speeds = []
         for row in rows:
             try:
                 road, speed = self.re_cells.findall(row)[:2]
                 road = self.re_tags.sub('', road)
                 road = road.replace(' ', '')
                 speed = self.re_tags.sub('', speed)
                 speed = speed.replace(' or higher', '')
                 speeds.append('%s=%s' % (road, speed))
             except:
                 continue
         speeds = ', '.join(speeds)
         return '%s: %s mins. (%s miles) [%s]' % (nick, time, miles, speeds)
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return '%s: %s' % (nick, self.error)

Example #31

0

Show file

File: movie.py Project: gipi/Richie

    def rate(self, movie):
        """Get the freshness rating of a movie"""
        try:
            opts = {'sitesearch': 'rt', 'search': movie}
            page = geturl(self.search, opts=opts, referer=self.baseurl)
            movie = normalize(movie)
            title = html_title.search(page).group(1)
            if title == self.search_title:
                # normalize search results
                movies = self.movies.findall(page)
                movies = [(path, normalize(title)) for path, title in movies]

                # look for exact match
                url = None
                for path, title in movies:
                    if title == movie:
                        url = urljoin(self.baseurl, path)
                        break

                # no exact match, take first one
                if not url:
                    url = urljoin(self.baseurl, movies[0][0])

                # load page
                page = geturl(url, referer=self.search)

            # find rating
            title = self.movie_title.search(page).group(1)
            rating = self.rating.search(page).group(1)

            # construct response
            response = 'Freshness'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            response += ': %s' % rating
            return response

        except:
            return

Example #32

0

Show file

File: movie.py Project: gipi/Richie

    def rate(self, movie):
        """Get the freshness rating of a movie"""
        try:
            opts={'sitesearch': 'rt', 'search': movie}
            page = geturl(self.search, opts=opts, referer=self.baseurl)
            movie = normalize(movie)
            title = html_title.search(page).group(1)
            if title == self.search_title:
                # normalize search results
                movies = self.movies.findall(page)
                movies = [(path, normalize(title)) for path, title in movies]

                # look for exact match
                url = None
                for path, title in movies:
                    if title == movie:
                        url = urljoin(self.baseurl, path)
                        break

                # no exact match, take first one
                if not url:
                    url = urljoin(self.baseurl, movies[0][0])

                # load page
                page = geturl(url, referer=self.search)

            # find rating
            title = self.movie_title.search(page).group(1)
            rating = self.rating.search(page).group(1)

            # construct response
            response = 'Freshness'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            response += ': %s' % rating
            return response

        except:
            return

Example #33

0

Show file

File: movie.py Project: gipi/Richie

    def rate(self, movie):
        """Get the rating for a movie"""
        try:
            page = geturl(self.search, opts={'s': 'all', 'q': movie})
            movie = normalize(movie)
            title = html_title.search(page).group(1)
            if title == self.search_title:
                # normalize search results
                movies = self.movies.findall(page)
                movies = [(y, z) for x, y, z in movies]
                movies = [(path, normalize(title)) for path, title in movies]
                movies = [(path, title) for path, title in movies if title]

                # see if we can find an exact match
                url = None
                for path, title in movies:
                    if title == movie:
                        url = urljoin(self.baseurl, path)
                        break

                # no exact match, take first option returned
                if not url:
                    url = urljoin(self.baseurl, movies[0][0])

                # load actual page & title
                page = geturl(url, referer=self.search)
                title = html_title.search(page).group(1)

            # get rating and generate response
            rating = self.rating.search(page).group(1)
            response = 'IMDB'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            response += ': %s/10' % rating
            return response

        except:
            return

Example #34

0

Show file

File: movie.py Project: gipi/Richie

    def rate(self, movie):
        """Get the rating for a movie"""
        try:
            page = geturl(self.search, opts={'s': 'all', 'q': movie})
            movie = normalize(movie)
            title = html_title.search(page).group(1)
            if title == self.search_title:
                # normalize search results
                movies = self.movies.findall(page)
                movies = [(y, z) for x, y, z in movies]
                movies = [(path, normalize(title)) for path, title in movies]
                movies = [(path, title) for path, title in movies if title]

                # see if we can find an exact match
                url = None
                for path, title in movies:
                    if title == movie:
                        url = urljoin(self.baseurl, path)
                        break

                # no exact match, take first option returned
                if not url:
                    url = urljoin(self.baseurl, movies[0][0])

                # load actual page & title
                page = geturl(url, referer=self.search)
                title = html_title.search(page).group(1)

            # get rating and generate response
            rating = self.rating.search(page).group(1)
            response = 'IMDB'
            if normalize(title) != movie:
                response += ' [%s]' % stripHTML(title)
            response += ': %s/10' % rating
            return response

        except:
            return

Example #35

0

Show file

File: war.py Project: compbrain/madcow

    def bodycount(self):

        try:
            doc = geturl(self._bodycount_url)
            data = self._bodycount_re.search(doc).group(1)
            data = data.decode('ascii', 'replace')
            data = stripHTML(data)
            data = self._re_whitespace.sub(u' ', data)
            data = data.strip()
            return data
        except Exception, error:
            log.warn(u'error in module %s' % self.__module__)
            log.exception(error)
            return u'UNKNOWN'

Example #36

0

Show file

    def response(self, nick, args, kwargs):
        query = args[0]

        try:
            doc = geturl(self.passage, opts={'search': query, 'version': 31})
            response = self.verse.search(doc).group(1)
            response = self.footnotes.sub('', response)
            response = self.junk_html.sub('', response)
            response = stripHTML(response)
            response = response.strip()
            return response[:self.max]
        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: God didn't like that." % nick

Example #37

0

Show file

File: traffic.py Project: compbrain/madcow

 def get_locations(self, reload=False):
     if not self.locs or reload:
         page = geturl(self.start_url)
         self.locs = {}
         c = m = None
         for loc_type, loc in self.re_loc.findall(page):
             if loc_type == u'c':
                 c = loc
                 self.locs.setdefault(c, {})
             elif loc_type == u'm':
                 m = loc
                 self.locs[c].setdefault(m, [])
             elif loc_type == u'x':
                 self.locs[c][m].append(loc)
     return self.locs

Example #38

0

Show file

File: war.py Project: gipi/Richie

 def bodycount(self):
     try:
         doc = geturl(IraqWar._bodycount_url)
         soup = BeautifulSoup(doc)
         data = soup.find('td', attrs={'class': 'main-num'})
         data = data.find('a')
         data = str(data.contents[0])
         data = stripHTML(data)
         data = IraqWar._re_whitespace.sub(' ', data)
         data = data.strip()
         return data
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return 'UNKNOWN'

Example #39

0

Show file

File: traffic.py Project: gipi/Richie

 def get_locations(self, reload=False):
     if not self.locs or reload:
         page = geturl(self.start_url)
         self.locs = {}
         c = m = None
         for loc_type, loc in self.re_loc.findall(page):
             if loc_type == 'c':
                 c = loc
                 self.locs.setdefault(c, {})
             elif loc_type == 'm':
                 m = loc
                 self.locs[c].setdefault(m, [])
             elif loc_type == 'x':
                 self.locs[c][m].append(loc)
     return self.locs

Example #40

0

Show file

 def bodycount(self):
     try:
         doc = geturl(IraqWar._bodycount_url)
         soup = BeautifulSoup(doc)
         data = soup.find('td', attrs={'class': 'main-num'})
         data = data.find('a')
         data = str(data.contents[0])
         data = stripHTML(data)
         data = IraqWar._re_whitespace.sub(' ', data)
         data = data.strip()
         return data
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return 'UNKNOWN'

Example #41

0

Show file

File: hugs.py Project: gipi/Richie

 def response(self, nick, args, kwargs):
     try:
         doc = geturl(self.random)
         soup = BeautifulSoup(doc)
         confs = soup.findAll('div', attrs={'class': 'content'})[3:]
         conf = random.choice(confs)
         conf = [str(p) for p in conf.findAll('p')]
         conf = ' '.join(conf)
         conf = stripHTML(conf)
         conf = conf.strip()
         return conf
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return '%s: I had some issues with that..' % nick

Example #42

0

Show file

File: bible.py Project: gipi/Richie

    def response(self, nick, args, kwargs):
        query = args[0]

        try:
            doc = geturl(self.passage, opts={'search': query, 'version': 31})
            response = self.verse.search(doc).group(1)
            response = self.footnotes.sub('', response)
            response = self.junk_html.sub('', response)
            response = stripHTML(response)
            response = response.strip()
            return response[:self.max]
        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return "%s: God didn't like that." % nick

Example #43

0

Show file

 def response(self, nick, args, kwargs):
     try:
         doc = geturl(self.random)
         soup = BeautifulSoup(doc)
         confs = soup.findAll('div', attrs={'class': 'content'})[3:]
         conf = random.choice(confs)
         conf = [str(p) for p in conf.findAll('p')]
         conf = ' '.join(conf)
         conf = stripHTML(conf)
         conf = conf.strip()
         return conf
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return '%s: I had some issues with that..' % nick

Example #44

0

Show file

File: stupid.py Project: gipi/Richie

    def get_comment(self):
        page = geturl(self.url)

        # remove high ascii since this is going to IRC
        page = self.utf8.sub('', page)

        # create BeautifulSoup document tree
        soup = BeautifulSoup(page)
        table = soup.find('table')
        rows = table.findAll('tr')
        row = rows[1]
        cells = row.findAll('td')
        source = cells[1].string
        comment = cells[2].string
        author = cells[3].string
        return '<%s@%s> %s' % (author, source, comment)

Example #45

0

Show file

    def get_comment(self):
        page = geturl(self.url)

        # remove high ascii since this is going to IRC
        page = self.utf8.sub('', page)

        # create BeautifulSoup document tree
        soup = BeautifulSoup(page)
        table = soup.find('table')
        rows = table.findAll('tr')
        row = rows[1]
        cells = row.findAll('td')
        source = cells[1].string
        comment = cells[2].string
        author = cells[3].string
        return '<%s@%s> %s' % (author, source, comment)

Example #46

0

Show file

File: hugs.py Project: compbrain/madcow

 def response(self, nick, args, kwargs):
     try:
         doc = geturl(self.random, add_headers={'Accept': '*/*'})
         soup = BeautifulSoup(doc)
         main = soup.find(u'div', attrs={u'id': u'main'})
         confs = main.findAll(u'div', attrs={u'class': u'content'})
         conf = random.choice(confs)
         conf = [unicode(p) for p in conf.findAll(u'p')]
         conf = u' '.join(conf)
         conf = stripHTML(conf)
         conf = conf.strip()
         return conf
     except Exception, error:
         log.warn(u'error in module %s' % self.__module__)
         log.exception(error)
         return u'%s: I had some issues with that..' % nick

Example #47

0

Show file

File: artfart.py Project: gipi/Richie

 def response(self, nick, args, kwargs):
     query = args[0]
     if query is None or query == '':
         url = self.random_url
     else:
         query = ' '.join(query.split())
         query = query.replace(' ', '_')
         query = urllib.quote(query) + '.html'
         url = urljoin(self.baseurl, query)
     try:
         doc = geturl(url)
         results = self.artfart.findall(doc)
         result = random.choice(results)
         title, art = result
         art = stripHTML(art)
         return '>>> %s <<<\n%s' % (title, art)
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return "%s: I had a problem with that, sorry." % nick

Example #48

0

Show file

File: bash.py Project: gipi/Richie

    def response(self, nick, args, kwargs):
        try:
            source = self.sources[args[0]]

            try:
                query = args[1]
            except:
                query = None

            try:
                num = int(query)
                query = None
            except:
                num = None

            if num:
                url = source.bynum.replace('num', str(num))
            elif query:
                url = source.search.replace('query', query)
            else:
                url = source.random

            doc = geturl(url)
            entries = source.entries.findall(doc)

            if query:
                entries = [entry for entry in entries if query in entry]

            if len(entries) > 1:
                entry = random.choice(entries)
            else:
                entry = entries[0]

            return stripHTML(entry)

        except Exception, e:
            log.warn('error in %s: %s' % (self.__module__, e))
            log.exception(e)
            return '%s: %s' % (nick, self._error)

Example #49

0

Show file

File: stockquote.py Project: gipi/Richie

    def get_quote(self, symbol):
        url = Yahoo._quote_url.replace('SYMBOL', symbol)
        page = geturl(url)
        soup = BeautifulSoup(page)
        company = ' '.join([str(item) for item in soup.find('h1').contents])
        company = stripHTML(company)
        tables = soup.findAll('table')
        table = tables[0]
        rows = table.findAll('tr')
        data = {}
        current_value = 0.0
        open_value = 0.0
        for row in rows:
            key, val = row.findAll('td')
            key = str(key.contents[0])
            if key == 'Change:':
                try:
                    img = val.find('img')
                    alt = str(img['alt'])
                    val = alt + stripHTML(str(val.contents[0]))
                except:
                    val = '0.00%'
            elif key == 'Ask:':
                continue
            else:
                val = stripHTML(str(val.contents[0]))

            val = val.replace(',', '')
            if Yahoo._isfloat.search(val):
                val = float(val)

            data[key] = val

            if key == 'Last Trade:' or key == 'Index Value:':
                current_value = val

            elif key == 'Prev Close:':
                open_value = val

        # see if we can calculate percentage
        try:
            change = 100 * (current_value - open_value) / open_value
            data['Change:'] += ' (%.2f%%)' % change
        except:
            pass

        # try and colorize the change field
        try:
            if 'Up' in data['Change:']:
                data['Change:'] = self._green + data['Change:'] + self._reset
            elif 'Down' in data['Change:']:
                data['Change:'] = self._red + data['Change:'] + self._reset
        except:
            pass

        # build friendly output
        output = []
        for key, val in data.items():
            if isinstance(val, float):
                val = '%.2f' % val
            output.append('%s %s' % (key, val))

        return '%s - ' % company + ' | '.join(output)

Example #50

0

Show file

    def forecast(self, location):
        page = geturl(url=self.search, opts={'query': location},
                referer=self.baseurl)
        soup = BeautifulSoup(page)

        # disambiguation page
        if 'Search Results' in str(soup):
            table = soup.find('table', attrs={'class': 'boxB full'})
            rows = table.findAll('tr')
            results = []
            match = None
            for row in rows:
                cells = row.findAll('td', attrs={'class': 'sortC'})
                for cell in cells:
                    link = cell.find('a')
                    if link is None or 'addfav' in str(link['href']):
                        continue
                    city = str(link.contents[0])
                    href = urljoin(self.baseurl, str(link['href']))
                    results.append(city)
                    if city.lower() == location.lower():
                        match = urljoin(self.baseurl, href)
                        break
                if match:
                    break
            if match:
                page = geturl(url=match)
                soup = BeautifulSoup(page)
            else:
                return 'Multiple results found: %s' % ', '.join(results)

        rss_url = soup.find('link', attrs=self._rss_link)['href']
        rss = rssparser.parse(rss_url)
        title = str(soup.find('h1').string).strip()
        conditions = stripHTML(rss['items'][0]['description'])
        fields = self._bar.split(conditions)
        data = {}
        for field in fields:
            try:
                key, val = self._keyval.search(field).groups()
                data[key] = val
            except:
                pass

        try:
            temp = float(self._tempF.search(data['Temperature']).group(1))
            blink = False
            if temp < 0:
                color = 6
            elif temp >=0 and temp < 40:
                color = 2
            elif temp >= 40 and temp < 60:
                color = 10
            elif temp >= 60 and temp < 80:
                color = 3
            elif temp >= 80 and temp < 90:
                color = 7
            elif temp >= 90 and temp < 100:
                color = 5
            elif temp >= 100:
                color = 5
                blink = True
            data['Temperature'] = '\x03%s\x16\x16%s\x0F' % (color,
                    data['Temperature'])
            if blink:
                data['Temperature'] = '\x1b[5m' + data['Temperature'] + \
                        '\x1b[0m'

        except:
            pass

        output = []
        for key, val in data.items():
            line = '%s: %s' % (key, val)
            output.append(line)

        output = ' | '.join(output)

        return '%s: %s' % (title, output)