Python BeautifulSoup.findAll Exemples, include.BeautifulSoup.BeautifulSoup.findAll Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : hugs.py Projet : gipi/Richie

 def response(self, nick, args, kwargs):
     try:
         doc = geturl(self.random)
         soup = BeautifulSoup(doc)
         confs = soup.findAll('div', attrs={'class': 'content'})[3:]
         conf = random.choice(confs)
         conf = [str(p) for p in conf.findAll('p')]
         conf = ' '.join(conf)
         conf = stripHTML(conf)
         conf = conf.strip()
         return conf
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return '%s: I had some issues with that..' % nick

Exemple #2

0

Afficher le fichier

 def response(self, nick, args, kwargs):
     try:
         doc = geturl(self.random)
         soup = BeautifulSoup(doc)
         confs = soup.findAll('div', attrs={'class': 'content'})[3:]
         conf = random.choice(confs)
         conf = [str(p) for p in conf.findAll('p')]
         conf = ' '.join(conf)
         conf = stripHTML(conf)
         conf = conf.strip()
         return conf
     except Exception, e:
         log.warn('error in %s: %s' % (self.__module__, e))
         log.exception(e)
         return '%s: I had some issues with that..' % nick

Exemple #3

0

Afficher le fichier

Fichier : wikiquotes.py Projet : compbrain/madcow

    def get_soup(self, query):
        if isinstance(query, (list, tuple)):
            query = u' '.join(query)

        # load page
        if query == u'random':
            opts = {}
            url = urljoin(self.base_url, self.random_path)
        else:
            opts = {u'search': query, u'go': u'Go'}
            url = urljoin(self.base_url, self.search_path)
        page = geturl(url, referer=self.base_url, opts=opts,
                      size=self.sample_size)

        # create BeautifulSoup document tree
        soup = BeautifulSoup(page)

        # extract title minus WP advert
        title = soup.title.string.replace(self.advert, u'')

        # remove all tabular data/sidebars
        for table in soup.findAll(u'table'):
            table.extract()

        # remove disambiguation links
        for dablink in soup.findAll(u'div', attrs={u'class': u'dablink'}):
            dablink.extract()

        # remove latitude/longitude metadata for places
        for coord in soup.findAll(u'span', attrs={u'id': u'coordinates'}):
            coord.extract()

        # strip non-english content wrappers
        for span in soup.findAll(u'span', attrs={u'lang': True}):
            span.extract()

        # remove IPA pronounciation guidelines
        for span in soup.findAll(u'span', attrs={u'class': u'IPA'}):
            span.extract()
        for link in soup.findAll(u'a', text=u'IPA'):
            link.extract()
        for span in soup.findAll(u'span', attrs={u'class': Wiki._audio}):
            span.extract()

        return soup, title

Exemple #4

0

Afficher le fichier

Fichier : wikimedia.py Projet : compbrain/madcow

    def _getpage(self, url, opts=None):
        page = geturl(url, referer=self.baseurl, opts=opts)
        # HTMLParser doesn't handle this very well.. see:
        # http://www.crummy.com/software/BeautifulSoup/3.1-problems.html
        page = self.scripts_re.sub('', page)
        soup = BeautifulSoup(page)

        # get page title
        title = soup.title.string
        if self.advert and self.advert in title:
            title = title.replace(self.advert, '')

        # remove all tabular data/sidebars
        for table in soup.findAll('table'):
            table.extract()

        # remove disambiguation links
        for div in soup.findAll('div', 'dablink'):
            div.extract()

        # remove latitude/longitude metadata for places
        for span in soup.findAll('span', id='coordinates'):
            span.extract()

        # strip non-english content wrappers
        for span in soup.findAll('span', lang=True):
            span.extract()

        # remove IPA pronounciation guidelines
        for span in soup.findAll('span', 'IPA'):
            span.extract()
        for a in soup.findAll('a', text='IPA'):
            a.extract()
        for span in soup.findAll('span', 'audiolink'):
            span.extract()

        return soup, title

Exemple #5

0

Afficher le fichier

Fichier : stockquote.py Projet : gipi/Richie

    def get_quote(self, symbol):
        url = Yahoo._quote_url.replace('SYMBOL', symbol)
        page = geturl(url)
        soup = BeautifulSoup(page)
        company = ' '.join([str(item) for item in soup.find('h1').contents])
        company = stripHTML(company)
        tables = soup.findAll('table')
        table = tables[0]
        rows = table.findAll('tr')
        data = {}
        current_value = 0.0
        open_value = 0.0
        for row in rows:
            key, val = row.findAll('td')
            key = str(key.contents[0])
            if key == 'Change:':
                try:
                    img = val.find('img')
                    alt = str(img['alt'])
                    val = alt + stripHTML(str(val.contents[0]))
                except:
                    val = '0.00%'
            elif key == 'Ask:':
                continue
            else:
                val = stripHTML(str(val.contents[0]))

            val = val.replace(',', '')
            if Yahoo._isfloat.search(val):
                val = float(val)

            data[key] = val

            if key == 'Last Trade:' or key == 'Index Value:':
                current_value = val

            elif key == 'Prev Close:':
                open_value = val

        # see if we can calculate percentage
        try:
            change = 100 * (current_value - open_value) / open_value
            data['Change:'] += ' (%.2f%%)' % change
        except:
            pass

        # try and colorize the change field
        try:
            if 'Up' in data['Change:']:
                data['Change:'] = self._green + data['Change:'] + self._reset
            elif 'Down' in data['Change:']:
                data['Change:'] = self._red + data['Change:'] + self._reset
        except:
            pass

        # build friendly output
        output = []
        for key, val in data.items():
            if isinstance(val, float):
                val = '%.2f' % val
            output.append('%s %s' % (key, val))

        return '%s - ' % company + ' | '.join(output)