def create_importer(page): importer = Importer(page=page, style='') resp = urlfetch.fetch(page.url, deadline=10) if resp.status_code == 200: soup = BeautifulSoup(resp.content) parser = CSSParser() for tag in soup.findAll(re.compile(r'^(link|style)$')): if tag.name == 'link': if tag.get('href', None) and tag.get('rel', 'stylesheet').lower() == 'stylesheet': url = urljoin(page.url, tag['href']) if urlparse(url).netloc != urlparse(request.url).netloc: importer.urls.append(url) elif tag.name == 'style': media = tag.get('media', None) sheet = parser.parseString(''.join(tag.contents).strip('\n'), href=url) style = sheet.cssText if media: style = '@media %s {\n%s\n}' % (media, style) style = '/* Imported directly from %s */\n%s\n' % (page.url, style) importer.style += style # Patch around AppEngine's frame inspection del parser importer.put() queue_import(page)