def update_entries(count=3): from biostar.apps.planet.models import Blog, BlogPost from biostar.apps.util import html #BlogPost.objects.all().delete() blogs = Blog.objects.filter(active=True) for blog in blogs: logger.info("parsing: %s: %s" % (blog.id, blog.title)) try: seen = [e.uid for e in BlogPost.objects.filter(blog=blog)] seen = set(seen) # Parse the blog doc = blog.parse() # get the new posts entries = [e for e in doc.entries if e.id not in seen] # Only list a few entries entries = entries[:count] for r in entries: r.title = smart_text(r.title) r.title = r.title.strip() r.title = html.strip_tags(r.title) r.title = r.title.strip()[:200] r.description = smart_text(r.description) r.description = html.strip_tags(r.description) date = r.get('date_parsed') or r.get('published_parsed') date = datetime(date[0], date[1], date[2]) date = timezone.make_aware(date, timezone=timezone.utc) if not r.title: continue body = html.clean(r.description)[:5000] content = html.strip_tags(body) try: post = BlogPost.objects.create(title=r.title, blog=blog, uid=r.id, content=content, html=body, creation_date=date, link=r.link) except Exception as exc: logger.error(r.title) logger.error("database error %s" % exc) else: logger.info("added: %s" % post.title) except KeyError as exc: logger.error("%s" % exc)
def update_entries(count=3): from biostar.apps.planet.models import Blog, BlogPost from biostar.apps.util import html #BlogPost.objects.all().delete() blogs = Blog.objects.filter(active=True) for blog in blogs: logger.info("parsing: %s: %s" % (blog.id, blog.title)) try: seen = [e.uid for e in BlogPost.objects.filter(blog=blog)] seen = set(seen) # Parse the blog doc = blog.parse() # get the new posts entries = [ e for e in doc.entries if e.id not in seen ] # Only list a few entries entries = entries[:count] for r in entries: r.title = smart_text(r.title) r.title = r.title.strip() r.title = html.strip_tags(r.title) r.title = r.title.strip()[:200] r.description = smart_text(r.description) r.description = html.strip_tags(r.description) date = r.get('date_parsed') or r.get('published_parsed') date = datetime(date[0], date[1], date[2]) date = timezone.make_aware(date, timezone=timezone.utc) if not r.title: continue body = html.clean(r.description)[:5000] content = html.strip_tags(body) try: post = BlogPost.objects.create(title=r.title, blog=blog, uid=r.id, content=content, html=body, creation_date=date, link=r.link) except Exception as exc: logger.error(r.title) logger.error("database error %s" % exc) else: logger.info("added: %s" % post.title) except KeyError as exc: logger.error("%s" % exc)
def save(self, *args, **kwargs): # Sanitize the post body. self.html = html.parse_html(self.content) # Must add tags with instance method. This is just for safety. self.tag_val = html.strip_tags(self.tag_val) # Posts other than a question also carry the same tag if self.is_toplevel and self.type != Post.QUESTION: required_tag = self.get_type_display() if required_tag not in self.tag_val: self.tag_val += "," + required_tag if not self.id: # Set the titles if self.parent and not self.title: self.title = self.parent.title if self.parent and self.parent.type in (Post.ANSWER, Post.COMMENT): # Only comments may be added to a parent that is answer or comment. self.type = Post.COMMENT if self.type is None: # Set post type if it was left empty. self.type = self.COMMENT if self.parent else self.FORUM # This runs only once upon object creation. self.title = self.parent.title if self.parent else self.title self.lastedit_user = self.author self.status = self.status or Post.PENDING self.creation_date = self.creation_date or now() self.lastedit_date = self.creation_date # Set the timestamps on the parent if self.type == Post.ANSWER: self.parent.lastedit_date = self.lastedit_date self.parent.lastedit_user = self.lastedit_user self.parent.save() # Recompute post reply count self.update_reply_count() super(Post, self).save(*args, **kwargs)