def get_lyrics_from_url(self, url): page = geturl(url, referer=self.baseurl) soup = BeautifulSoup(page) content = soup.find('div', attrs={'id': 'content'}) [div.extract() for div in content.findAll('div')] [link.extract() for link in content.findAll('a')] [script.extract() for script in content.findAll('script')] lines = [str(line) for line in content.contents] data = ''.join(lines) data = self._newline.sub('', data) data = self._leadbreak.sub('', data) data = self._endbreak.sub('', data) lines = self._break.split(data) verses = [] while True: try: i = lines.index('') verse, lines = lines[:i], lines[i+1:] verses.append(verse) except ValueError: verses.append(lines) break for i, verse in enumerate(verses): verse = ' / '.join(verse) verse = whitespace.sub(' ', verse) verses[i] = verse if self._spam in verses: del verses[verses.index(self._spam)] return verses
def bodycount(self): try: doc = geturl(IraqWar._bodycount_url) soup = BeautifulSoup(doc) data = soup.find('td', attrs={'class': 'main-num'}) data = data.find('a') data = str(data.contents[0]) data = stripHTML(data) data = IraqWar._re_whitespace.sub(' ', data) data = data.strip() return data except Exception, e: log.warn('error in %s: %s' % (self.__module__, e)) log.exception(e) return 'UNKNOWN'
def response(self, nick, args, kwargs): try: doc = geturl(self.random) soup = BeautifulSoup(doc) confs = soup.findAll('div', attrs={'class': 'content'})[3:] conf = random.choice(confs) conf = [str(p) for p in conf.findAll('p')] conf = ' '.join(conf) conf = stripHTML(conf) conf = conf.strip() return conf except Exception, e: log.warn('error in %s: %s' % (self.__module__, e)) log.exception(e) return '%s: I had some issues with that..' % nick
def get_comment(self): page = geturl(self.url) # remove high ascii since this is going to IRC page = self.utf8.sub('', page) # create BeautifulSoup document tree soup = BeautifulSoup(page) table = soup.find('table') rows = table.findAll('tr') row = rows[1] cells = row.findAll('td') source = cells[1].string comment = cells[2].string author = cells[3].string return '<%s@%s> %s' % (author, source, comment)
def forecast(self, location): page = geturl(url=self.search, opts={'query': location}, referer=self.baseurl) soup = BeautifulSoup(page) # disambiguation page if 'Search Results' in str(soup): table = soup.find('table', attrs={'class': 'boxB full'}) rows = table.findAll('tr') results = [] match = None for row in rows: cells = row.findAll('td', attrs={'class': 'sortC'}) for cell in cells: link = cell.find('a') if link is None or 'addfav' in str(link['href']): continue city = str(link.contents[0]) href = urljoin(self.baseurl, str(link['href'])) results.append(city) if city.lower() == location.lower(): match = urljoin(self.baseurl, href) break if match: break if match: page = geturl(url=match) soup = BeautifulSoup(page) else: return 'Multiple results found: %s' % ', '.join(results) rss_url = soup.find('link', attrs=self._rss_link)['href'] rss = rssparser.parse(rss_url) title = str(soup.find('h1').string).strip() conditions = stripHTML(rss['items'][0]['description']) fields = self._bar.split(conditions) data = {} for field in fields: try: key, val = self._keyval.search(field).groups() data[key] = val except: pass try: temp = float(self._tempF.search(data['Temperature']).group(1)) blink = False if temp < 0: color = 6 elif temp >=0 and temp < 40: color = 2 elif temp >= 40 and temp < 60: color = 10 elif temp >= 60 and temp < 80: color = 3 elif temp >= 80 and temp < 90: color = 7 elif temp >= 90 and temp < 100: color = 5 elif temp >= 100: color = 5 blink = True data['Temperature'] = '\x03%s\x16\x16%s\x0F' % (color, data['Temperature']) if blink: data['Temperature'] = '\x1b[5m' + data['Temperature'] + \ '\x1b[0m' except: pass output = [] for key, val in data.items(): line = '%s: %s' % (key, val) output.append(line) output = ' | '.join(output) return '%s: %s' % (title, output)
def get_quote(self, symbol): url = Yahoo._quote_url.replace('SYMBOL', symbol) page = geturl(url) soup = BeautifulSoup(page) company = ' '.join([str(item) for item in soup.find('h1').contents]) company = stripHTML(company) tables = soup.findAll('table') table = tables[0] rows = table.findAll('tr') data = {} current_value = 0.0 open_value = 0.0 for row in rows: key, val = row.findAll('td') key = str(key.contents[0]) if key == 'Change:': try: img = val.find('img') alt = str(img['alt']) val = alt + stripHTML(str(val.contents[0])) except: val = '0.00%' elif key == 'Ask:': continue else: val = stripHTML(str(val.contents[0])) val = val.replace(',', '') if Yahoo._isfloat.search(val): val = float(val) data[key] = val if key == 'Last Trade:' or key == 'Index Value:': current_value = val elif key == 'Prev Close:': open_value = val # see if we can calculate percentage try: change = 100 * (current_value - open_value) / open_value data['Change:'] += ' (%.2f%%)' % change except: pass # try and colorize the change field try: if 'Up' in data['Change:']: data['Change:'] = self._green + data['Change:'] + self._reset elif 'Down' in data['Change:']: data['Change:'] = self._red + data['Change:'] + self._reset except: pass # build friendly output output = [] for key, val in data.items(): if isinstance(val, float): val = '%.2f' % val output.append('%s %s' % (key, val)) return '%s - ' % company + ' | '.join(output)