def response(self, nick, args, kwargs): try: doc = geturl(self.random) soup = BeautifulSoup(doc) confs = soup.findAll('div', attrs={'class': 'content'})[3:] conf = random.choice(confs) conf = [str(p) for p in conf.findAll('p')] conf = ' '.join(conf) conf = stripHTML(conf) conf = conf.strip() return conf except Exception, e: log.warn('error in %s: %s' % (self.__module__, e)) log.exception(e) return '%s: I had some issues with that..' % nick
def response(self, nick, args, kwargs): try: doc = geturl(self.random) soup = BeautifulSoup(doc) confs = soup.findAll('div', attrs={'class': 'content'})[3:] conf = random.choice(confs) conf = [str(p) for p in conf.findAll('p')] conf = ' '.join(conf) conf = stripHTML(conf) conf = conf.strip() return conf except Exception, e: log.warn('error in %s: %s' % (self.__module__, e)) log.exception(e) return '%s: I had some issues with that..' % nick
def get_soup(self, query): if isinstance(query, (list, tuple)): query = u' '.join(query) # load page if query == u'random': opts = {} url = urljoin(self.base_url, self.random_path) else: opts = {u'search': query, u'go': u'Go'} url = urljoin(self.base_url, self.search_path) page = geturl(url, referer=self.base_url, opts=opts, size=self.sample_size) # create BeautifulSoup document tree soup = BeautifulSoup(page) # extract title minus WP advert title = soup.title.string.replace(self.advert, u'') # remove all tabular data/sidebars for table in soup.findAll(u'table'): table.extract() # remove disambiguation links for dablink in soup.findAll(u'div', attrs={u'class': u'dablink'}): dablink.extract() # remove latitude/longitude metadata for places for coord in soup.findAll(u'span', attrs={u'id': u'coordinates'}): coord.extract() # strip non-english content wrappers for span in soup.findAll(u'span', attrs={u'lang': True}): span.extract() # remove IPA pronounciation guidelines for span in soup.findAll(u'span', attrs={u'class': u'IPA'}): span.extract() for link in soup.findAll(u'a', text=u'IPA'): link.extract() for span in soup.findAll(u'span', attrs={u'class': Wiki._audio}): span.extract() return soup, title
def _getpage(self, url, opts=None): page = geturl(url, referer=self.baseurl, opts=opts) # HTMLParser doesn't handle this very well.. see: # http://www.crummy.com/software/BeautifulSoup/3.1-problems.html page = self.scripts_re.sub('', page) soup = BeautifulSoup(page) # get page title title = soup.title.string if self.advert and self.advert in title: title = title.replace(self.advert, '') # remove all tabular data/sidebars for table in soup.findAll('table'): table.extract() # remove disambiguation links for div in soup.findAll('div', 'dablink'): div.extract() # remove latitude/longitude metadata for places for span in soup.findAll('span', id='coordinates'): span.extract() # strip non-english content wrappers for span in soup.findAll('span', lang=True): span.extract() # remove IPA pronounciation guidelines for span in soup.findAll('span', 'IPA'): span.extract() for a in soup.findAll('a', text='IPA'): a.extract() for span in soup.findAll('span', 'audiolink'): span.extract() return soup, title
def get_quote(self, symbol): url = Yahoo._quote_url.replace('SYMBOL', symbol) page = geturl(url) soup = BeautifulSoup(page) company = ' '.join([str(item) for item in soup.find('h1').contents]) company = stripHTML(company) tables = soup.findAll('table') table = tables[0] rows = table.findAll('tr') data = {} current_value = 0.0 open_value = 0.0 for row in rows: key, val = row.findAll('td') key = str(key.contents[0]) if key == 'Change:': try: img = val.find('img') alt = str(img['alt']) val = alt + stripHTML(str(val.contents[0])) except: val = '0.00%' elif key == 'Ask:': continue else: val = stripHTML(str(val.contents[0])) val = val.replace(',', '') if Yahoo._isfloat.search(val): val = float(val) data[key] = val if key == 'Last Trade:' or key == 'Index Value:': current_value = val elif key == 'Prev Close:': open_value = val # see if we can calculate percentage try: change = 100 * (current_value - open_value) / open_value data['Change:'] += ' (%.2f%%)' % change except: pass # try and colorize the change field try: if 'Up' in data['Change:']: data['Change:'] = self._green + data['Change:'] + self._reset elif 'Down' in data['Change:']: data['Change:'] = self._red + data['Change:'] + self._reset except: pass # build friendly output output = [] for key, val in data.items(): if isinstance(val, float): val = '%.2f' % val output.append('%s %s' % (key, val)) return '%s - ' % company + ' | '.join(output)