Example #1
0
def create_importer(page):
    importer = Importer(page=page, style='')
    resp = urlfetch.fetch(page.url, deadline=10)
    if resp.status_code == 200:
        soup = BeautifulSoup(resp.content)
        parser = CSSParser()
        for tag in soup.findAll(re.compile(r'^(link|style)$')):
            if tag.name == 'link':
                if tag.get('href', None) and tag.get('rel', 'stylesheet').lower() == 'stylesheet':
                    url = urljoin(page.url, tag['href'])
                    if urlparse(url).netloc != urlparse(request.url).netloc:
                        importer.urls.append(url)
            elif tag.name == 'style':
                media = tag.get('media', None)
                sheet = parser.parseString(''.join(tag.contents).strip('\n'), href=url)
                style = sheet.cssText
                if media:
                    style = '@media %s {\n%s\n}' % (media, style)
                style = '/* Imported directly from %s */\n%s\n' % (page.url, style)
                importer.style += style
        # Patch around AppEngine's frame inspection
        del parser

        importer.put()
        queue_import(page)