def possible_blog_rolls(self): """Finds possible blog rolls on the page""" # Find possible nests, where we have a UL tag # Formerly, we checked self.data.body, but some pages # miss a body tag, eg therichgirlsareweeping.blogspot.com nests = [x for x in self.data.findAll({'p': True, 'div': True}) \ if x.findAll('ul',recursive=False)] list = self.reject_pop_sites(uniq(flatten(self.link_lists(nests)))) return [canonical_url(site) for site in list]
def create_from_url(self, url): """Creates a new blog from a URL and does an initial data pull for the blog.""" if not (type(url) == type('s') or type(url) == type(u'u')): raise IllegalBlogException("URL must be valid") urlobj = urlparse(url) if not (urlobj[0] == 'http'): raise IllegalBlogException("Blog must use HTTP transport") url = canonical_url(url) blog = self.model(url=url, title=None, feed=None,\ traversable=False, rank=0.0, html=None) blog.fetch() blog.save() return blog
def exists(self, url): """True if blog with given URL exists in the database""" objects = Blog.objects.filter(url=canonical_url(url)) return len(objects) > 0