Example #1
0
 def get_links_from_soup(self, soup, baseurl):
     """Finds links in HTML DOM. Returns list of strings (urls)
     """
     
     base = soup.head.find('base')
     baseurl = base['href'] if base else baseurl
     linky = soup.findAll('div', attrs={'class' : 'article-preview-top'}) + soup.findAll('div', attrs={'class' : 'article-preview'})
     
     linky = [x.find('a') for x in linky if x is not None]
     linky = [url_to_absolute(x['href'], baseurl) for x in linky if x is not None]
     return linky
Example #2
0
 def get_links_from_soup(self, soup, baseurl):
     """Finds links in HTML DOM. Returns list of strings (urls)
     """
     div = soup.find('div', attrs = {'id': 'contentw'})
     pole = []
     for h3 in div.findAll('h3'):
         a = h3.find('a', attrs={'class' : 'mainHeadline'})
         if a is None:
             a = h3.find('a')
         if a is not None:
             pole.append(url_to_absolute(a['href'], baseurl))
     return pole