예제 #1
0
    def __init__(self, url):
        self.url = url

        try:
            self.html = urlfetch.fetch(url).content
        except urlfetch_errors.InvalidURLError:
            return

        self.soup = BeautifulSoup(self.html)
        self.populate_me_links()
예제 #2
0
 def extractLogo(self, html):
     soup = BeautifulSoup(html)
 
     img = soup.find('img', id='logo')
     div = soup.find('div', id='logo')
     
     description = ''
     
     if img:
         pic = img['src']
         if 'title' in img: description = img['title']
     elif div:
         pic = re.search(r'(?<=url\()\S+(?=\))', str(div)).group(0)
     else:
         msg = 'Cronjob: Logos: could not find IMG or DIV tag with logo id!'
         logging.error(msg)
         print msg
         return False
     
     #pic = pic.decode(charset)
     return [pic, description]
예제 #3
0
class Page:

    url = ""
    html = ""
    soup = None
    hcards = []
    me_links = []

    def __init__(self, url):
        self.url = url

        try:
            self.html = urlfetch.fetch(url).content
        except urlfetch_errors.InvalidURLError:
            return

        self.soup = BeautifulSoup(self.html)
        self.populate_me_links()

    def populate_me_links(self):
        self.me_links = [
            urlparse.urljoin(self.url, tag["href"]) for tag in self.soup.findAll(attrs={"rel": "me"}) if tag.get("href")
        ]