Exemple #1
0
def update_entries(count=3):
    from biostar.apps.planet.models import Blog, BlogPost
    from biostar.apps.util import html

    #BlogPost.objects.all().delete()

    blogs = Blog.objects.filter(active=True)

    for blog in blogs:
        logger.info("parsing: %s: %s" % (blog.id, blog.title))
        try:
            seen = [e.uid for e in BlogPost.objects.filter(blog=blog)]
            seen = set(seen)

            # Parse the blog
            doc = blog.parse()

            # get the new posts
            entries = [e for e in doc.entries if e.id not in seen]

            # Only list a few entries
            entries = entries[:count]

            for r in entries:
                r.title = smart_text(r.title)
                r.title = r.title.strip()
                r.title = html.strip_tags(r.title)
                r.title = r.title.strip()[:200]
                r.description = smart_text(r.description)
                r.description = html.strip_tags(r.description)

                date = r.get('date_parsed') or r.get('published_parsed')
                date = datetime(date[0], date[1], date[2])
                date = timezone.make_aware(date, timezone=timezone.utc)
                if not r.title:
                    continue
                body = html.clean(r.description)[:5000]
                content = html.strip_tags(body)
                try:
                    post = BlogPost.objects.create(title=r.title,
                                                   blog=blog,
                                                   uid=r.id,
                                                   content=content,
                                                   html=body,
                                                   creation_date=date,
                                                   link=r.link)
                except Exception as exc:
                    logger.error(r.title)
                    logger.error("database error %s" % exc)
                else:
                    logger.info("added: %s" % post.title)

        except KeyError as exc:
            logger.error("%s" % exc)
def update_entries(count=3):
    from biostar.apps.planet.models import Blog, BlogPost
    from biostar.apps.util import html

    #BlogPost.objects.all().delete()

    blogs = Blog.objects.filter(active=True)

    for blog in blogs:
        logger.info("parsing: %s: %s" % (blog.id, blog.title))
        try:
            seen = [e.uid for e in BlogPost.objects.filter(blog=blog)]
            seen = set(seen)

            # Parse the blog
            doc = blog.parse()

            # get the new posts
            entries = [ e for e in doc.entries if e.id not in seen ]

            # Only list a few entries
            entries = entries[:count]

            for r in entries:
                r.title = smart_text(r.title)
                r.title = r.title.strip()
                r.title = html.strip_tags(r.title)
                r.title = r.title.strip()[:200]
                r.description = smart_text(r.description)
                r.description = html.strip_tags(r.description)

                date = r.get('date_parsed') or r.get('published_parsed')
                date = datetime(date[0], date[1], date[2])
                date = timezone.make_aware(date, timezone=timezone.utc)
                if not r.title:
                    continue
                body = html.clean(r.description)[:5000]
                content = html.strip_tags(body)
                try:
                    post = BlogPost.objects.create(title=r.title, blog=blog, uid=r.id, content=content, html=body, creation_date=date, link=r.link)
                except Exception as exc:
                    logger.error(r.title)
                    logger.error("database error %s" % exc)
                else:
                    logger.info("added: %s" % post.title)

        except KeyError as exc:
            logger.error("%s" % exc)
Exemple #3
0
    def save(self, *args, **kwargs):

        # Sanitize the post body.
        self.html = html.parse_html(self.content)

        # Must add tags with instance method. This is just for safety.
        self.tag_val = html.strip_tags(self.tag_val)

        # Posts other than a question also carry the same tag
        if self.is_toplevel and self.type != Post.QUESTION:
            required_tag = self.get_type_display()
            if required_tag not in self.tag_val:
                self.tag_val += "," + required_tag

        if not self.id:

            # Set the titles
            if self.parent and not self.title:
                self.title = self.parent.title

            if self.parent and self.parent.type in (Post.ANSWER, Post.COMMENT):
                # Only comments may be added to a parent that is answer or comment.
                self.type = Post.COMMENT

            if self.type is None:
                # Set post type if it was left empty.
                self.type = self.COMMENT if self.parent else self.FORUM

            # This runs only once upon object creation.
            self.title = self.parent.title if self.parent else self.title
            self.lastedit_user = self.author
            self.status = self.status or Post.PENDING
            self.creation_date = self.creation_date or now()
            self.lastedit_date = self.creation_date

            # Set the timestamps on the parent
            if self.type == Post.ANSWER:
                self.parent.lastedit_date = self.lastedit_date
                self.parent.lastedit_user = self.lastedit_user
                self.parent.save()

        # Recompute post reply count
        self.update_reply_count()

        super(Post, self).save(*args, **kwargs)
Exemple #4
0
    def save(self, *args, **kwargs):

        # Sanitize the post body.
        self.html = html.parse_html(self.content)

        # Must add tags with instance method. This is just for safety.
        self.tag_val = html.strip_tags(self.tag_val)

        # Posts other than a question also carry the same tag
        if self.is_toplevel and self.type != Post.QUESTION:
            required_tag = self.get_type_display()
            if required_tag not in self.tag_val:
                self.tag_val += "," + required_tag

        if not self.id:

            # Set the titles
            if self.parent and not self.title:
                self.title = self.parent.title

            if self.parent and self.parent.type in (Post.ANSWER, Post.COMMENT):
                # Only comments may be added to a parent that is answer or comment.
                self.type = Post.COMMENT

            if self.type is None:
                # Set post type if it was left empty.
                self.type = self.COMMENT if self.parent else self.FORUM

            # This runs only once upon object creation.
            self.title = self.parent.title if self.parent else self.title
            self.lastedit_user = self.author
            self.status = self.status or Post.PENDING
            self.creation_date = self.creation_date or now()
            self.lastedit_date = self.creation_date

            # Set the timestamps on the parent
            if self.type == Post.ANSWER:
                self.parent.lastedit_date = self.lastedit_date
                self.parent.lastedit_user = self.lastedit_user
                self.parent.save()

        # Recompute post reply count
        self.update_reply_count()

        super(Post, self).save(*args, **kwargs)