Exemple #1
0
    def handle(self, *args, **options):
        now = utc_now()
        verbose = int(options['verbosity']) > 1

        qs = BlogItemHits.objects.filter(hits__gt=0)
        for hit in qs.values('oid', 'hits'):
            # This is totally arbitrary!
            # I'm using hits and number of comments as a measure of
            # how is should be ranked.
            # The thinking is that posts that are found and read are
            # likely to be more popular and should thus be ranked
            # higher.
            plogrank = hit['hits']
            comments = (
                BlogComment.objects
                .filter(blogitem__oid=hit['oid']).count()
            )
            # multiple by a factor to make this slightly more significant
            plogrank += comments * 10
            (
                BlogItem.objects
                .filter(oid=hit['oid'])
                .update(plogrank=plogrank)
            )
            if verbose:
                print str(plogrank).rjust(7), '\t', hit['oid']
Exemple #2
0
    def test_blog_post_caching(self):
        blog = BlogItem.objects.create(
            oid="some-longish-test-post",
            title="TITLEX",
            text="BLABLABLA",
            display_format="structuredtext",
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        url = reverse("blog_post", args=[blog.oid])

        import peterbecom.plog.views

        old_render = peterbecom.plog.views.render
        from django.shortcuts import render as django_render

        render_counts = []

        def mocked_render(*a, **k):
            render_counts.append(1)
            return django_render(*a, **k)

        peterbecom.plog.views.render = mocked_render
        try:
            response = self.client.get(url)
            content = response.content.decode("utf-8")
            assert blog.title in content
            assert "0 comments" in content
            response = self.client.get(url)
            content = response.content.decode("utf-8")
            assert "0 comments" in content

            BlogComment.objects.create(
                comment="textext",
                blogitem=blog,
                approved=True,
                add_date=utc_now() + datetime.timedelta(seconds=1),
            )
            response = self.client.get(url)
            content = response.content.decode("utf-8")
            assert "1 comment" in content
        finally:
            peterbecom.plog.views.render = old_render
Exemple #3
0
    def test_blog_post_caching(self):
        blog = BlogItem.objects.create(
            oid='some-longish-test-post',
            title='TITLEX',
            text='BLABLABLA',
            display_format='structuredtext',
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        url = reverse('blog_post', args=[blog.oid])

        import peterbecom.plog.views
        old_render = peterbecom.plog.views.render
        from django.shortcuts import render as django_render
        render_counts = []

        def mocked_render(*a, **k):
            render_counts.append(1)
            return django_render(*a, **k)

        peterbecom.plog.views.render = mocked_render
        try:
            response = self.client.get(url)
            self.assertTrue(blog.title in response.content)
            assert '0 comments' in response.content
            response = self.client.get(url)
            assert '0 comments' in response.content

            BlogComment.objects.create(
                comment="textext",
                blogitem=blog,
                approved=True,
                add_date=utc_now() + datetime.timedelta(seconds=1),
            )
            response = self.client.get(url)
            assert '1 comment' in response.content
        finally:
            peterbecom.plog.views.render = old_render

        assert len(render_counts) == 2, render_counts
Exemple #4
0
    def test_old_redirects(self):
        blog = BlogItem.objects.create(
            oid='myoid',
            title='TITLEX',
            text="""
            ttest test
            """,
            display_format='structuredtext',
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        url = reverse('blog_post', args=[blog.oid])

        response = self.client.get(url)
        assert response.status_code == 200

        response = self.client.get(url, {'replypath': 'foo'})
        self.assertEqual(response.status_code, 301)
        self.assertEqual(urlparse(response['location']).path, url)
        self.assertTrue(not urlparse(response['location']).query)
Exemple #5
0
def get_data(max_length=1000, pub_date_format=None, offset=0):
    items = []
    category_names = dict((x.id, x.name) for x in Category.objects.all())
    categories = defaultdict(list)
    for e in BlogItem.categories.through.objects.all():
        categories[e.blogitem_id].append(category_names[e.category_id])
    qs = BlogItem.objects.filter(pub_date__lt=utc_now()).order_by("-pub_date")
    for item in qs[offset:max_length]:
        pub_date = item.pub_date
        if pub_date_format:
            pub_date = pub_date_format(pub_date)
        items.append({
            "title": item.title,
            "slug": item.oid,
            "pub_date": pub_date,
            "keywords": [x for x in item.proper_keywords if x][:3],
            "categories": categories[item.id][:3],
        })
    return items
Exemple #6
0
    def test_old_redirects(self):
        blog = BlogItem.objects.create(
            oid="myoid",
            title="TITLEX",
            text="""
            ttest test
            """,
            display_format="structuredtext",
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        url = reverse("blog_post", args=[blog.oid])

        response = self.client.get(url)
        assert response.status_code == 200

        response = self.client.get(url, {"replypath": "foo"})
        assert response.status_code == 301
        assert urlparse(response["location"]).path == url
        assert not urlparse(response["location"]).query
Exemple #7
0
    def test_blog_post_ping(self):
        blog = BlogItem.objects.create(
            oid="myoid",
            title="TITLEX",
            text="""
            ttest test
            """,
            display_format="structuredtext",
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        url = reverse("blog_post_ping", args=[blog.oid])
        response = self.client.get(url)
        assert response.status_code == 405
        response = self.client.put(url)
        assert response.status_code == 200
        assert response.json()["ok"]

        hit, = BlogItemHit.objects.all()
        assert hit.blogitem == blog
Exemple #8
0
def get_data(max_length=1000, pub_date_format=None, offset=0):
    items = []
    category_names = dict((x.id, x.name) for x in Category.objects.all())
    categories = defaultdict(list)
    for e in BlogItem.categories.through.objects.all():
        categories[e.blogitem_id].append(
            category_names[e.category_id]
        )
    qs = BlogItem.objects.filter(pub_date__lt=utc_now()).order_by('-pub_date')
    for item in qs[offset:max_length]:
        pub_date = item.pub_date
        if pub_date_format:
            pub_date = pub_date_format(pub_date)
        items.append({
            'title': item.title,
            'slug': item.oid,
            'pub_date': pub_date,
            'keywords': [x for x in item.proper_keywords if x][:3],
            'categories': categories[item.id][:3],
        })
    return items
Exemple #9
0
def home(request, oc=None):
    context = {}
    qs = BlogItem.objects.filter(pub_date__lt=utc_now())
    if oc is not None:
        if not oc:  # empty string
            return redirect('/', permanent=True)
        categories = parse_ocs_to_categories(oc)
        cat_q = make_categories_q(categories)
        qs = qs.filter(cat_q)
        context['categories'] = categories

    # Reasons for not being here
    if request.method == 'HEAD':
        return http.HttpResponse('')

    BATCH_SIZE = 10
    try:
        page = max(1, int(request.GET.get('page', 1))) - 1
    except ValueError:
        raise http.Http404('invalid page value')
    n, m = page * BATCH_SIZE, (page + 1) * BATCH_SIZE
    max_count = qs.count()
    first_post, = qs.order_by('-pub_date')[:1]
    context['first_post_url'] = request.build_absolute_uri(
        reverse('blog_post', args=[first_post.oid])
    )
    if (page + 1) * BATCH_SIZE < max_count:
        context['next_page'] = page + 2
    context['previous_page'] = page

    context['blogitems'] = (
        qs
        .prefetch_related('categories')
        .order_by('-pub_date')
    )[n:m]

    if page > 0:  # page starts on 0
        context['page_title'] = 'Page {}'.format(page + 1)

    return render(request, 'homepage/home.html', context)
Exemple #10
0
    def test_blog_post_with_comment_approval(self):
        blog = BlogItem.objects.create(
            oid='some-longish-test-post',
            title='TITLEX',
            text='BLABLABLA',
            display_format='structuredtext',
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        url = reverse('blog_post', args=[blog.oid])

        self._login()
        loggedin = self.client
        anonymous = Client()
        assert len(loggedin.cookies)
        assert not len(anonymous.cookies)

        comment = BlogComment.objects.create(
            oid='a1000',
            blogitem=blog,
            comment='COMMENTX',
            name='Mr Anonymous',
        )
        # but it hasn't been approved yet
        response = anonymous.get(url)
        self.assertEqual(response.status_code, 200)
        self.assertTrue('COMMENTX' not in response.content)

        # let's approve it!
        approve_url = reverse('approve_comment', args=[blog.oid, comment.oid])
        response = loggedin.post(
            approve_url,
            HTTP_X_REQUESTED_WITH='XMLHttpRequest'
        )
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.content, 'OK')

        response = anonymous.get(url)
        self.assertEqual(response.status_code, 200)
        self.assertTrue('COMMENTX' in response.content)
Exemple #11
0
 def handle(self, *args, **options):
     if cache.get('nodomains-queued'):
         return
     queued = models.Queued.objects.filter(failed_attempts__lt=5)
     for queued in queued.order_by('add_date'):
         cache.set('nodomains-queued', True, 100)
         try:
             then = utc_now() - datetime.timedelta(days=1)
             models.Result.objects.get(
                 url=queued.url,
                 add_date__gt=then
             )
             print "Skipping", queued.url
         except models.Result.DoesNotExist:
             print queued.url
             try:
                 run_url(queued.url)
             except Exception:
                 queued.failed_attempts += 1
                 queued.save()
                 continue
         queued.delete()
     cache.delete('nodomains-queued')
Exemple #12
0
    def test_text_rendering_with_images(self):
        blog = BlogItem.objects.create(
            oid='myoid',
            title='TITLEX',
            text="""
            "image.png":/plog/myoid/image.png
            and *this*
            """,
            display_format='structuredtext',
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        url = reverse('blog_post', args=[blog.oid])
        response = self.client.get(url)
        content = response.content
        self.assertTrue('<em>this</em>' in content)
        regex_str = (
            '/CONTENTCACHE-\d+%s' % (re.escape('/plog/myoid/image.png'),)
        )
        self.assertTrue(re.findall(regex_str, content))

        old = settings.STATIC_URL
        settings.STATIC_URL = '//some.cdn.com/'
        try:
            blog.text_rendered = ''
            blog.save()
            response = self.client.get(url)
            content = response.content
            regex_str = (
                '%sCONTENTCACHE-\d+%s' % (
                    settings.STATIC_URL,
                    re.escape('/plog/myoid/image.png')
                )
            )
            self.assertTrue(re.findall(regex_str, content))
        finally:
            settings.STATIC_URL = old
Exemple #13
0
def timesince(date):
    if date.tzinfo:
        return smartertimesince(date, utc_now())
    else:
        return smartertimesince(date)
Exemple #14
0
 def items(self, categories):
     qs = (BlogItem.objects
             .filter(pub_date__lt=utc_now()))
     if categories:
         qs = qs.filter(make_categories_q(categories))
     return qs.order_by('-pub_date')[:10]
Exemple #15
0
def sitemap(request):
    base_url = "https://%s" % RequestSite(request).domain

    urls = []
    urls.append('<?xml version="1.0" encoding="iso-8859-1"?>')
    urls.append('<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">')

    def add(loc, lastmod=None, changefreq="monthly", priority=None):
        url = "<url><loc>%s%s</loc>" % (base_url, loc)
        if lastmod:
            url += "<lastmod>%s</lastmod>" % lastmod.strftime("%Y-%m-%d")
        if priority:
            url += "<priority>%s</priority>" % priority
        if changefreq:
            url += "<changefreq>%s</changefreq>" % changefreq
        url += "</url>"
        urls.append(url)

    now = utc_now()
    latest_blogitem, = BlogItem.objects.filter(
        pub_date__lt=now).order_by("-pub_date")[:1]
    add("/",
        priority=1.0,
        changefreq="daily",
        lastmod=latest_blogitem.pub_date)
    add(reverse("about"), changefreq="weekly", priority=0.5)
    add(reverse("contact"), changefreq="weekly", priority=0.5)

    # TODO: Instead of looping over BlogItem, loop over
    # BlogItemTotalHits and use the join to build this list.
    # Then we can sort by a scoring function.
    # This will only work once ALL blogitems have at least 1 hit.
    blogitems = BlogItem.objects.filter(pub_date__lt=now)
    for blogitem in blogitems.order_by("-pub_date"):
        if not blogitem.modify_date:
            # legacy!
            try:
                latest_comment, = BlogComment.objects.filter(
                    approved=True, blogitem=blogitem).order_by("-add_date")[:1]
                blogitem.modify_date = latest_comment.add_date
            except ValueError:
                blogitem.modify_date = blogitem.pub_date
            blogitem._modify_date_set = True
            blogitem.save()

        age = (now - blogitem.modify_date).days
        if age < 14:
            changefreq = "daily"
        elif age < 60:
            changefreq = "weekly"
        elif age < 100:
            changefreq = "monthly"
        else:
            changefreq = None
        add(
            reverse("blog_post", args=[blogitem.oid]),
            lastmod=blogitem.modify_date,
            changefreq=changefreq,
        )

    urls.append("</urlset>")
    return http.HttpResponse("\n".join(urls), content_type="text/xml")
Exemple #16
0
def home(request, oc=None, page=1):
    context = {}
    qs = BlogItem.objects.filter(pub_date__lt=utc_now())
    if oc is not None:
        if not oc:  # empty string
            return redirect("/", permanent=True)
        categories = parse_ocs_to_categories(oc, strict_matching=True)
        cat_q = make_categories_q(categories)
        qs = qs.filter(cat_q)
        context["categories"] = categories

    # Reasons for not being here
    if request.method == "HEAD":
        return http.HttpResponse("")

    batch_size = settings.HOMEPAGE_BATCH_SIZE

    try:
        page = max(1, int(page)) - 1
    except ValueError:
        raise http.Http404("invalid page value")
    n, m = page * batch_size, (page + 1) * batch_size
    max_count = qs.count()
    if page * batch_size > max_count:
        return http.HttpResponse("Too far back in time\n", status=404)
    if (page + 1) * batch_size < max_count:
        context["next_page"] = page + 2
    context["previous_page"] = page

    # If you're going deep into the pagination with some really old
    # pages, it's not worth using the fs cache because if you have to
    # store a fs cache version for every single page from p5 to p55
    # it's too likely to get stale and old and it's too much work
    # on the mincss postprocess.
    if page > 6 or (context.get("categories") and page > 2):
        request._fscache_disable = True

    if context.get("categories"):
        oc_path = "/".join(
            ["oc-{}".format(c.name) for c in context["categories"]])
        oc_path = oc_path[3:]

    if context.get("next_page"):
        if context.get("categories"):
            next_page_url = reverse("only_category_paged",
                                    args=(oc_path, context["next_page"]))
        else:
            next_page_url = reverse("home_paged",
                                    args=(context["next_page"], ))
        context["next_page_url"] = next_page_url

    if context["previous_page"] > 1:
        if context.get("categories"):
            previous_page_url = reverse("only_category_paged",
                                        args=(oc_path,
                                              context["previous_page"]))
        else:
            previous_page_url = reverse("home_paged",
                                        args=(context["previous_page"], ))
        context["previous_page_url"] = previous_page_url
    elif context["previous_page"]:  # i.e. == 1
        if context.get("categories"):
            previous_page_url = reverse("only_category", args=(oc_path, ))
        else:
            previous_page_url = "/"
        context["previous_page_url"] = previous_page_url

    context["blogitems"] = (
        qs.prefetch_related("categories").order_by("-pub_date"))[n:m]

    if page > 0:  # page starts on 0
        context["page_title"] = "Page {}".format(page + 1)

    approved_comments_count = {}
    blog_comments_count_qs = (BlogComment.objects.filter(
        blogitem__in=context["blogitems"],
        approved=True).values("blogitem_id").annotate(
            count=Count("blogitem_id")))
    for count in blog_comments_count_qs:
        approved_comments_count[count["blogitem_id"]] = count["count"]
    context["approved_comments_count"] = approved_comments_count

    return render(request, "homepage/home.html", context)
Exemple #17
0
    def test_homepage_cache_rendering(self):
        url = reverse("home")

        blog1 = BlogItem.objects.create(
            title="TITLE1",
            text="BLABLABLA",
            display_format="structuredtext",
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        BlogComment.objects.create(oid="c1",
                                   comment="textext",
                                   blogitem=blog1,
                                   approved=True)

        BlogComment.objects.create(oid="c2",
                                   comment="tuxtuxt",
                                   blogitem=blog1,
                                   approved=True)

        response = self.client.get(url)
        content = response.content.decode("utf-8")
        self.assertTrue("TITLE1" in content)
        self.assertTrue("2 comments" in content)

        blog1.title = "TUTLE1"
        blog1.save()
        response = self.client.get(url)
        content = response.content.decode("utf-8")
        self.assertTrue("TUTLE1" in content)

        blog2 = BlogItem.objects.create(
            oid="t2",
            title="TATLE2",
            text="BLEBLE",
            display_format="structuredtext",
            pub_date=utc_now() - datetime.timedelta(seconds=1),
        )

        response = self.client.get(url)
        content = response.content.decode("utf-8")
        self.assertTrue("TATLE2" in content)
        self.assertTrue("0 comments" in content)
        self.assertTrue("TUTLE1" in content)
        self.assertTrue("2 comments" in content)

        # by categories only
        cat1 = Category.objects.create(name="CATEGORY1")
        cat2 = Category.objects.create(name="CATEGORY2")
        blog1.categories.add(cat1)
        blog1.save()
        blog2.categories.add(cat2)
        blog2.save()

        response = self.client.get(url)
        content = response.content.decode("utf-8")
        self.assertTrue("CATEGORY1" in content)
        self.assertTrue("CATEGORY2" in content)

        url = reverse("only_category", args=["CATEGORY2"])
        response = self.client.get(url)
        content = response.content.decode("utf-8")
        self.assertTrue("CATEGORY1" not in content)
        self.assertTrue("CATEGORY2" in content)

        url = reverse("only_category", args=["CATEGORY1"])
        response = self.client.get(url)
        content = response.content.decode("utf-8")
        self.assertTrue("CATEGORY1" in content)
        self.assertTrue("CATEGORY2" not in content)

        for i in range(2, 21):
            BlogItem.objects.create(
                oid="t-%s" % i,
                title="TITLE-%s" % i,
                text="BLEBLE",
                display_format="structuredtext",
                pub_date=utc_now() - datetime.timedelta(seconds=20 + i),
            )

        url = reverse("home")
        response = self.client.get(url)
        content = response.content.decode("utf-8")
        assert "/p2" in content
        visible_titles = []
        not_visible_titles = []
        for item in BlogItem.objects.all():
            if item.title in content:
                visible_titles.append(item.title)
            else:
                not_visible_titles.append(item.title)

        url = reverse("home_paged", args=(2, ))
        response = self.client.get(url)
        content = response.content.decode("utf-8")
        batch_size = settings.HOMEPAGE_BATCH_SIZE
        for each in visible_titles[:batch_size]:
            assert each not in content
        for each in not_visible_titles[:batch_size]:
            assert each in content
        assert "/p3" in content
Exemple #18
0
    def test_homepage_cache_rendering(self):
        url = reverse('home')

        blog1 = BlogItem.objects.create(
            title='TITLE1',
            text='BLABLABLA',
            display_format='structuredtext',
            pub_date=utc_now() - datetime.timedelta(seconds=10),
        )
        BlogComment.objects.create(
            oid='c1',
            comment="textext",
            blogitem=blog1,
            approved=True,
        )

        BlogComment.objects.create(
            oid='c2',
            comment="tuxtuxt",
            blogitem=blog1,
            approved=True,
        )

        response = self.client.get(url)
        self.assertTrue('TITLE1' in response.content)
        self.assertTrue('2 comments' in response.content)

        blog1.title = 'TUTLE1'
        blog1.save()
        response = self.client.get(url)
        self.assertTrue('TUTLE1' in response.content)

        blog2 = BlogItem.objects.create(
            oid='t2',
            title='TATLE2',
            text='BLEBLE',
            display_format='structuredtext',
            pub_date=utc_now() - datetime.timedelta(seconds=1),
        )

        response = self.client.get(url)
        self.assertTrue('TATLE2' in response.content)
        self.assertTrue('0 comments' in response.content)
        self.assertTrue('TUTLE1' in response.content)
        self.assertTrue('2 comments' in response.content)

        # by categories only
        cat1 = Category.objects.create(
            name='CATEGORY1',
        )
        cat2 = Category.objects.create(
            name='CATEGORY2',
        )
        blog1.categories.add(cat1)
        blog1.save()
        blog2.categories.add(cat2)
        blog2.save()

        response = self.client.get(url)
        self.assertTrue('CATEGORY1' in response.content)
        self.assertTrue('CATEGORY2' in response.content)

        url = reverse('only_category', args=['CATEGORY2'])
        response = self.client.get(url)
        self.assertTrue('CATEGORY1' not in response.content)
        self.assertTrue('CATEGORY2' in response.content)

        url = reverse('only_category', args=['CATEGORY1'])
        response = self.client.get(url)
        self.assertTrue('CATEGORY1' in response.content)
        self.assertTrue('CATEGORY2' not in response.content)

        for i in range(2, 21):
            BlogItem.objects.create(
                oid='t-%s' % i,
                title='TITLE-%s' % i,
                text='BLEBLE',
                display_format='structuredtext',
                pub_date=utc_now() - datetime.timedelta(seconds=20 + i),
            )

        url = reverse('home')
        response = self.client.get(url)
        assert '?page=2' in response.content
        visible_titles = []
        not_visible_titles = []
        for item in BlogItem.objects.all():
            if item.title in response.content:
                visible_titles.append(item.title)
            else:
                not_visible_titles.append(item.title)

        response = self.client.get(url, {'page': 2})
        for each in visible_titles[:10]:
            assert each not in response.content
        for each in not_visible_titles[:10]:
            assert each in response.content
        assert '?page=1' in response.content
        assert '?page=3' in response.content
Exemple #19
0
def sitemap(request):
    base_url = 'https://%s' % RequestSite(request).domain

    urls = []
    urls.append('<?xml version="1.0" encoding="iso-8859-1"?>')
    urls.append('<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">')

    def add(loc, lastmod=None, changefreq='monthly', priority=None):
        url = '<url><loc>%s%s</loc>' % (base_url, loc)
        if lastmod:
            url += '<lastmod>%s</lastmod>' % lastmod.strftime('%Y-%m-%d')
        if priority:
            url += '<priority>%s</priority>' % priority
        if changefreq:
            url += '<changefreq>%s</changefreq>' % changefreq
        url += '</url>'
        urls.append(url)

    now = utc_now()
    latest_blogitem, = (
        BlogItem.objects
        .filter(pub_date__lt=now)
        .order_by('-pub_date')[:1]
    )
    add(
        '/',
        priority=1.0,
        changefreq='daily',
        lastmod=latest_blogitem.pub_date
    )
    add(reverse('about'), changefreq='weekly', priority=0.5)
    add(reverse('contact'), changefreq='weekly', priority=0.5)

    for blogitem in (BlogItem.objects
                     .filter(pub_date__lt=now)
                     .order_by('-pub_date')[:1000]):
        if not blogitem.modify_date:
            # legacy!
            try:
                latest_comment, = (
                    BlogComment.objects
                    .filter(approved=True, blogitem=blogitem)
                    .order_by('-add_date')[:1]
                )
                blogitem.modify_date = latest_comment.add_date
            except ValueError:
                blogitem.modify_date = blogitem.pub_date
            blogitem._modify_date_set = True
            blogitem.save()

        age = (now - blogitem.modify_date).days
        if age < 14:
            changefreq = 'daily'
        elif age < 60:
            changefreq = 'weekly'
        elif age < 100:
            changefreq = 'monthly'
        else:
            changefreq = None
        add(reverse('blog_post', args=[blogitem.oid]),
            lastmod=blogitem.modify_date,
            changefreq=changefreq
            )

    urls.append('</urlset>')
    return http.HttpResponse('\n'.join(urls), content_type="text/xml")
Exemple #20
0
def home(request, oc=None, page=1):
    context = {}
    qs = BlogItem.objects.filter(pub_date__lt=utc_now())
    if oc is not None:
        if not oc:  # empty string
            return redirect("/", permanent=True)
        categories = parse_ocs_to_categories(oc, strict_matching=True)
        cat_q = make_categories_q(categories)
        qs = qs.filter(cat_q)
        context["categories"] = categories

    # Reasons for not being here
    if request.method == "HEAD":
        return http.HttpResponse("")

    batch_size = settings.HOMEPAGE_BATCH_SIZE

    try:
        page = max(1, int(page)) - 1
    except ValueError:
        raise http.Http404("invalid page value")
    n, m = page * batch_size, (page + 1) * batch_size
    max_count = qs.count()
    if page * batch_size > max_count:
        return http.HttpResponse("Too far back in time\n", status=404)
    if (page + 1) * batch_size < max_count:
        context["next_page"] = page + 2
    context["previous_page"] = page

    # If you're going deep into the pagination with some really old
    # pages, it's not worth using the fs cache because if you have to
    # store a fs cache version for every single page from p5 to p55
    # it's too likely to get stale and old and it's too much work
    # on the mincss postprocess.
    if page > 6 or (context.get("categories") and page > 2):
        request._fscache_disable = True

    if context.get("categories"):
        oc_path = "/".join(["oc-{}".format(c.name) for c in context["categories"]])
        oc_path = oc_path[3:]

    if context.get("next_page"):
        if context.get("categories"):
            next_page_url = reverse(
                "only_category_paged", args=(oc_path, context["next_page"])
            )
        else:
            next_page_url = reverse("home_paged", args=(context["next_page"],))
        context["next_page_url"] = next_page_url

    if context["previous_page"] > 1:
        if context.get("categories"):
            previous_page_url = reverse(
                "only_category_paged", args=(oc_path, context["previous_page"])
            )
        else:
            previous_page_url = reverse("home_paged", args=(context["previous_page"],))
        context["previous_page_url"] = previous_page_url
    elif context["previous_page"]:  # i.e. == 1
        if context.get("categories"):
            previous_page_url = reverse("only_category", args=(oc_path,))
        else:
            previous_page_url = "/"
        context["previous_page_url"] = previous_page_url

    context["blogitems"] = (qs.prefetch_related("categories").order_by("-pub_date"))[
        n:m
    ]

    if page > 0:  # page starts on 0
        context["page_title"] = "Page {}".format(page + 1)

    approved_comments_count = {}
    blog_comments_count_qs = (
        BlogComment.objects.filter(blogitem__in=context["blogitems"], approved=True)
        .values("blogitem_id")
        .annotate(count=Count("blogitem_id"))
    )
    for count in blog_comments_count_qs:
        approved_comments_count[count["blogitem_id"]] = count["count"]
    context["approved_comments_count"] = approved_comments_count

    return render(request, "homepage/home.html", context)
Exemple #21
0
def search(request):
    data = {}
    search = request.GET.get('q', '')
    if len(search) > 90:
        return http.HttpResponse("Search too long")
    documents = []
    data['base_url'] = 'https://%s' % RequestSite(request).domain
    tag_strip = re.compile('<[^>]+>')

    def append_match(item, words):

        text = item.rendered
        text = tag_strip.sub(' ', text)

        sentences = []

        def matcher(match):
            return '<b>%s</b>' % match.group()

        if regex:
            for each in regex.finditer(text):
                sentence = text[max(each.start() - 35, 0): each.end() + 40]
                sentence = regex_ext.sub(matcher, sentence)
                sentence = sentence.strip()
                if each.start() > 0 and not sentence[0].isupper():
                    sentence = '...%s' % sentence
                if each.end() < len(text):
                    sentence = '%s...' % sentence
                sentences.append(sentence.strip())
                if len(sentences) > 3:
                    break

        if isinstance(item, BlogItem):
            title = html_escape(item.title)
            if regex_ext:
                title = regex_ext.sub(matcher, title)
            date = item.pub_date
            type_ = 'blog'
        else:
            if not item.blogitem:
                item.correct_blogitem_parent()
            title = (
                "Comment on <em>%s</em>" % html_escape(item.blogitem.title)
            )
            date = item.add_date
            type_ = 'comment'

        documents.append({
            'title': title,
            'summary': '<br>'.join(sentences),
            'date': date,
            'url': item.get_absolute_url(),
            'type': type_,
        })

    def create_search(s):
        words = re.findall('\w+', s)
        words_orig = words[:]

        if 'or' in words:
            which = words.index('or')
            words_orig.remove('or')
            if (which + 1) < len(words) and which > 0:
                before = words.pop(which - 1)
                words.pop(which - 1)
                after = words.pop(which - 1)
                words.insert(which - 1, '%s | %s' % (before, after))
        while 'and' in words_orig:
            words_orig.remove('and')
        while 'and' in words:
            words.remove('and')

        escaped = ' & '.join(words)
        return escaped, words_orig

    data['q'] = search

    keyword_search = {}
    if len(search) > 1:
        _keyword_keys = ('keyword', 'keywords', 'category', 'categories')
        search, keyword_search = split_search(search, _keyword_keys)

    not_ids = defaultdict(set)
    times = []
    search_times = []
    count_documents = []
    regex = regex_ext = None

    def append_queryset_search(queryset, order_by, words, model_name):
        count = items.count()
        count_documents.append(count)
        for item in items.order_by(order_by)[:20]:
            append_match(item, words)
            not_ids[model_name].add(item.pk)
        return count

    now = utc_now()

    if len(search) > 1:
        search_escaped, words = create_search(search)
        regex = re.compile(
            r'\b(%s)' % '|'.join(
                re.escape(word)
                for word in words
                if word.lower() not in STOPWORDS
            ),
            re.I | re.U
        )
        regex_ext = re.compile(
            r'\b(%s\w*)\b' % '|'.join(
                re.escape(word)
                for word in words
                if word.lower() not in STOPWORDS
            ),
            re.I | re.U
        )

        for model in (BlogItem, BlogComment):
            qs = model.objects
            model_name = model._meta.object_name
            if model == BlogItem:
                qs = qs.filter(pub_date__lte=now)
                fields = ('title', 'text')
                order_by = '-pub_date'
                if keyword_search.get('keyword'):
                    qs = qs.filter(
                        proper_keywords__contains=[keyword_search['keyword']]
                    )
                if keyword_search.get('keywords'):
                    keywords = keyword_search['keywords']
                    keywords = [
                        x.strip() for x in keywords.split(
                            ',' in keywords and ',' or None
                        )
                        if x.strip()
                    ]
                    qs = qs.filter(
                        proper_keywords__overlap=keywords
                    )
            elif model == BlogComment:
                fields = ('comment',)
                order_by = '-add_date'
                _specials = ('keyword', 'keywords', 'category', 'categories')
                if any(keyword_search.get(k) for k in _specials):
                    # BlogComments don't have this keyword so it can
                    # never match
                    continue

            for field in fields:
                if not_ids[model_name]:
                    qs = qs.exclude(pk__in=not_ids[model_name])
                _sql = "to_tsvector('english'," + field + ") "
                if ' | ' in search_escaped or ' & ' in search_escaped:
                    _sql += "@@ to_tsquery('english', %s)"
                else:
                    _sql += "@@ plainto_tsquery('english', %s)"
                items = qs.extra(where=[_sql], params=[search_escaped])

                t0 = time.time()
                count = append_queryset_search(
                    items, order_by, words, model_name
                )
                t1 = time.time()
                times.append('%s to find %s %ss by field %s' % (
                    t1 - t0,
                    count,
                    model_name,
                    field
                ))
                search_times.append(t1-t0)

        logger.info('Searchin for %r:\n%s' % (search, '\n'.join(times)))
    elif keyword_search and any(keyword_search.values()):
        t0 = time.time()

        if keyword_search.get('keyword') or keyword_search.get('keywords'):
            if keyword_search.get('keyword'):
                assert isinstance(keyword_search['keyword'], basestring)
                items = BlogItem.objects.filter(
                    pub_date__lt=timezone.now(),
                    proper_keywords__contains=[keyword_search['keyword']]
                ).order_by('-pub_date')
            elif keyword_search.get('keywords'):
                keywords = keyword_search['keywords']
                keywords = [
                    x.strip() for x in keywords.split(
                        ',' in keywords and ',' or None
                    )
                    if x.strip()
                ]
                items = BlogItem.objects.filter(
                    pub_date__lt=timezone.now(),
                    proper_keywords__overlap=keywords
                ).order_by('-pub_date')

            model_name = BlogItem._meta.object_name
            append_queryset_search(items, '-pub_date', [], model_name)

        if keyword_search.get('category') or keyword_search.get('categories'):
            if keyword_search.get('category'):
                categories = Category.objects.filter(
                    name=keyword_search.get('category')
                )
            else:
                cats = [x.strip() for x
                        in keyword_search.get('categories').split(',')
                        if x.strip()]
                categories = Category.objects.filter(name__in=cats)
            if categories:
                cat_q = make_categories_q(categories)
                items = BlogItem.objects.filter(cat_q)
                model_name = BlogItem._meta.object_name
                append_queryset_search(items, '-pub_date', [], model_name)
        t1 = time.time()
        search_times.append(t1 - t0)

    data['search_time'] = sum(search_times)
    count_documents_shown = len(documents)
    data['documents'] = documents
    data['count_documents'] = sum(count_documents)
    data['count_documents_shown'] = count_documents_shown
    data['better'] = None
    if not data['count_documents']:
        _qterms = len(data['q'].split())
        if ' or ' not in data['q'] and _qterms > 1 and _qterms < 5:
            data['better'] = data['q'].replace(' ', ' or ')
    if data['better']:
        data['better_url'] = (
            reverse('search') + '?' +
            urllib.urlencode({'q': data['better'].encode('utf-8')})
        )

    if not data['q']:
        page_title = 'Search'
    elif data['count_documents'] == 1:
        page_title = '1 thing found'
    else:
        page_title = '%s things found' % data['count_documents']
    if count_documents_shown < data['count_documents']:
        if count_documents_shown == 1:
            page_title += ' (but only 1 thing shown)'
        else:
            page_title += ' (but only %s things shown)' % count_documents_shown
    data['page_title'] = page_title
    if (
        not data['count_documents'] and
        len(search.split()) == 1 and not keyword_search
    ):
        if BlogItem.objects.filter(
            proper_keywords__overlap=[search],
            pub_date__lt=timezone.now()
        ):
            url = reverse('search')
            url += '?' + urllib.urlencode({'q': 'keyword:%s' % search})
            return redirect(url)

    return render(request, 'homepage/search.html', data)