Ejemplo n.º 1
0
def redis_increment(prefix, request):
    redis = get_redis_connection()
    full_path = request.get_full_path()
    if full_path != '/' and full_path.endswith('/'):
        full_path = full_path[:-1]
    value = '%s %s' % (request.method, iri_to_uri(full_path))
    redis.zincrby(prefix, value, 1)
 def handle(self, **options):
     redis = get_redis_connection()
     redis.expire('plog:hits', 1)
     redis.expire('plog:misses', 1)
     redis.expire('homepage:hits', 1)
     redis.expire('homepage:misses', 1)
     redis.set('counters-start', str(datetime.datetime.utcnow()))
Ejemplo n.º 3
0
def autocomplete(request):
    q = request.GET.get('q')
    if not q:
        return []
    conn = get_redis_connection('titles')
    search_index = RedisSearchIndex(conn)
    results = search_index.search(q, n=10)
    return results
Ejemplo n.º 4
0
def stats_index(request):
    data = {}
    redis = get_redis_connection()
    urls = {}

    def get_totals(prefix):
        total_hits = total_misses = 0
        for uri, count in redis.zrevrange('%s:hits' % prefix,
                                       0, 100, withscores=True):
            count = int(count)
            total_hits += count
            if uri not in urls:
                urls[uri] = {'hits': 0, 'misses': 0}
            urls[uri]['hits'] += count
        for uri, count in redis.zrevrange('%s:misses' % prefix,
                                       0, 100, withscores=True):
            count = int(count)
            total_misses += count
            if uri not in urls:
                urls[uri] = {'hits': 0, 'misses': 0}
            urls[uri]['misses'] += count
        if total_hits:
            total_ratio = round(100.0 * total_misses / total_hits, 1)
        else:
            total_ratio = ''
        return {
            'total_hits': total_hits,
            'total_misses': total_misses,
            'total_ratio': total_ratio
        }

    data['plog'] = get_totals('plog')
    data['homepage'] = get_totals('homepage')

    total_hits = total_misses = 0
    for v in urls.values():
        total_hits += v['hits']
        total_misses += v['misses']
        if v['hits']:
            v['ratio'] = '%.1f%%' % (100.0 * v['misses'] / v['hits'])
        else:
            v['ratio'] = '--'
    def make_abs_url(url):
        if url.startswith('GET '):
            return url[4:]
        return None
    urls = [(make_abs_url(x), x, y['hits'], y['misses'], y['ratio'])
            for x, y in urls.items()]
    urls.sort(lambda x, y: cmp(y[1], x[1]))
    data['urls'] = urls

    data['start_date'] = redis.get('counters-start')
    data['total_hits'] = total_hits
    data['total_misses'] = total_misses
    data['page_title'] = 'Stats'
    return render(request, 'stats/index.html', data)
Ejemplo n.º 5
0
    def handle(self, *args, **options):
        now = utc_now()

        connection = get_redis_connection('titles')
        search_index = RedisSearchIndex(connection)

        query = u' '.join(args)
        print "QUERY:", repr(query)
        t0 = time.time()
        results = search_index.search(query)
        t1 = time.time()
        print "In", t1 - t0, "seconds"
        print "TERMS:", results['terms']
        for id, score, title in results['results']:
            print "\t", id.ljust(4), score, repr(title)
Ejemplo n.º 6
0
def add_post(request):
    context = {}
    user = request.user
    assert user.is_staff or user.is_superuser
    if request.method == 'POST':
        form = BlogForm(data=request.POST)
        if form.is_valid():
            keywords = [
                x.strip() for x
                in form.cleaned_data['keywords'].splitlines()
                if x.strip()
            ]
            blogitem = BlogItem.objects.create(
                oid=form.cleaned_data['oid'],
                title=form.cleaned_data['title'],
                text=form.cleaned_data['text'],
                summary=form.cleaned_data['summary'],
                display_format=form.cleaned_data['display_format'],
                codesyntax=form.cleaned_data['codesyntax'],
                url=form.cleaned_data['url'],
                pub_date=form.cleaned_data['pub_date'],
                keywords=keywords,
            )
            for category in form.cleaned_data['categories']:
                blogitem.categories.add(category)
            blogitem.save()

            redis = get_redis_connection(reconnection_wrapped=True)
            for keyword in keywords:
                if not redis.smembers('kw:%s' % keyword):
                    redis.sadd('kw:%s' % keyword, blogitem.pk)
                    redis.incr('kwcount')

            url = reverse('edit_post', args=[blogitem.oid])
            return redirect(url)
    else:
        initial = {
            'pub_date': utc_now() + datetime.timedelta(seconds=60 * 60),
            'display_format': 'markdown',
        }
        form = BlogForm(initial=initial)
    context['form'] = form
    context['page_title'] = 'Add post'
    context['blogitem'] = None
    return render(request, 'plog/edit.html', context)
Ejemplo n.º 7
0
def _get_related_pks(post):
    redis = get_redis_connection(reconnection_wrapped=True)
    count_keywords = redis.get("kwcount")
    if not count_keywords:
        for p in BlogItem.objects.filter(pub_date__lt=utc_now()):
            for keyword in p.keywords:
                redis.sadd("kw:%s" % keyword, p.pk)
                redis.incr("kwcount")

    _keywords = post.keywords
    _related = defaultdict(int)
    for i, keyword in enumerate(_keywords):
        ids = redis.smembers("kw:%s" % keyword)
        for pk in ids:
            pk = int(pk)
            if pk != post.pk:
                _related[pk] += len(_keywords) - i
    items = sorted(((v, k) for (k, v) in _related.items()), reverse=True)
    return [y for (x, y) in items]
Ejemplo n.º 8
0
    def handle(self, *args, **options):
        now = utc_now()
        verbose = int(options['verbosity']) > 1

        connection = get_redis_connection('titles')
        connection.flushdb()
        search_index = RedisSearchIndex(connection)

        for plog in models.BlogItem.objects.filter(pub_date__lte=now).order_by('?'):
            if verbose:
                print repr(plog.title),
            # print search_index.add_item(plog.id, plog.title, 1)
            try:
                hits = models.BlogItemHits.objects.get(oid=plog.oid).hits
            except models.BlogItemHits.DoesNotExist:
                hits = 1
            result = search_index.add_item(plog.oid, plog.title, hits), hits
            if verbose:
                print result
Ejemplo n.º 9
0
def edit_post(request, oid):
    blogitem = get_object_or_404(BlogItem, oid=oid)
    data = {}
    user = request.user
    assert user.is_staff or user.is_superuser
    if request.method == 'POST':
        form = BlogForm(instance=blogitem, data=request.POST)
        if form.is_valid():
            blogitem.oid = form.cleaned_data['oid']
            blogitem.title = form.cleaned_data['title']
            blogitem.text = form.cleaned_data['text']
            blogitem.text_rendered = ''
            blogitem.summary = form.cleaned_data['summary']
            blogitem.display_format = form.cleaned_data['display_format']
            blogitem.codesyntax = form.cleaned_data['codesyntax']
            blogitem.pub_date = form.cleaned_data['pub_date']
            keywords = [x.strip() for x
                        in form.cleaned_data['keywords'].splitlines()
                        if x.strip()]
            blogitem.keywords = keywords
            blogitem.categories.clear()
            for category in form.cleaned_data['categories']:
                blogitem.categories.add(category)
            blogitem.save()

            redis = get_redis_connection(reconnection_wrapped=True)
            for keyword in keywords:
                if not redis.smembers('kw:%s' % keyword):
                    redis.sadd('kw:%s' % keyword, blogitem.pk)
                    redis.incr('kwcount')

            url = reverse('edit_post', args=[blogitem.oid])
            return redirect(url)

    else:
        form = BlogForm(instance=blogitem)
    data['form'] = form
    data['page_title'] = 'Edit post'
    data['blogitem'] = blogitem
    data['INBOUND_EMAIL_ADDRESS'] = settings.INBOUND_EMAIL_ADDRESS
    return render(request, 'plog/edit.html', data)
Ejemplo n.º 10
0
def edit_post(request, oid):
    blogitem = get_object_or_404(BlogItem, oid=oid)
    data = {}
    user = request.user
    assert user.is_staff or user.is_superuser
    if request.method == "POST":
        form = BlogForm(instance=blogitem, data=request.POST)
        if form.is_valid():
            blogitem.oid = form.cleaned_data["oid"]
            blogitem.title = form.cleaned_data["title"]
            blogitem.text = form.cleaned_data["text"]
            blogitem.text_rendered = ""
            blogitem.summary = form.cleaned_data["summary"]
            blogitem.display_format = form.cleaned_data["display_format"]
            blogitem.codesyntax = form.cleaned_data["codesyntax"]
            blogitem.pub_date = form.cleaned_data["pub_date"]
            keywords = [x.strip() for x in form.cleaned_data["keywords"].splitlines() if x.strip()]
            blogitem.keywords = keywords
            blogitem.categories.clear()
            for category in form.cleaned_data["categories"]:
                blogitem.categories.add(category)
            blogitem.save()

            redis = get_redis_connection(reconnection_wrapped=True)
            for keyword in keywords:
                if not redis.smembers("kw:%s" % keyword):
                    redis.sadd("kw:%s" % keyword, blogitem.pk)
                    redis.incr("kwcount")

            url = reverse("edit_post", args=[blogitem.oid])
            return redirect(url)

    else:
        form = BlogForm(instance=blogitem)
    data["form"] = form
    data["page_title"] = "Edit post"
    data["blogitem"] = blogitem
    data["INBOUND_EMAIL_ADDRESS"] = settings.INBOUND_EMAIL_ADDRESS
    return render(request, "plog/edit.html", data)
Ejemplo n.º 11
0
def add_post(request):
    data = {}
    user = request.user
    assert user.is_staff or user.is_superuser
    if request.method == "POST":
        form = BlogForm(data=request.POST)
        if form.is_valid():
            keywords = [x.strip() for x in form.cleaned_data["keywords"].splitlines() if x.strip()]
            blogitem = BlogItem.objects.create(
                oid=form.cleaned_data["oid"],
                title=form.cleaned_data["title"],
                text=form.cleaned_data["text"],
                summary=form.cleaned_data["summary"],
                display_format=form.cleaned_data["display_format"],
                codesyntax=form.cleaned_data["codesyntax"],
                url=form.cleaned_data["url"],
                pub_date=form.cleaned_data["pub_date"],
                keywords=keywords,
            )
            for category in form.cleaned_data["categories"]:
                blogitem.categories.add(category)
            blogitem.save()

            redis = get_redis_connection(reconnection_wrapped=True)
            for keyword in keywords:
                if not redis.smembers("kw:%s" % keyword):
                    redis.sadd("kw:%s" % keyword, blogitem.pk)
                    redis.incr("kwcount")

            url = reverse("edit_post", args=[blogitem.oid])
            return redirect(url)
    else:
        initial = {"pub_date": utc_now() + datetime.timedelta(seconds=60 * 60), "display_format": "markdown"}
        form = BlogForm(initial=initial)
    data["form"] = form
    data["page_title"] = "Add post"
    data["blogitem"] = None
    return render(request, "plog/edit.html", data)
Ejemplo n.º 12
0
def search(request):
    data = {}
    search = request.GET.get('q', '')
    if len(search) > 90:
        return http.HttpResponse("Search too long")
    documents = []
    data['base_url'] = 'http://%s' % RequestSite(request).domain
    tag_strip = re.compile('<[^>]+>')

    def append_match(item, words):

        text = item.rendered
        text = tag_strip.sub(' ', text)

        sentences = []

        def matcher(match):
            return '<b>%s</b>' % match.group()

        if regex:
            for each in regex.finditer(text):
                sentence = text[max(each.start() - 35, 0): each.end() + 40]
                sentence = regex_ext.sub(matcher, sentence)
                sentence = sentence.strip()
                if each.start() > 0 and not sentence[0].isupper():
                    sentence = '...%s' % sentence
                if each.end() < len(text):
                    sentence = '%s...' % sentence
                sentences.append(sentence.strip())
                if len(sentences) > 3:
                    break

        if isinstance(item, BlogItem):
            title = html_escape(item.title)
            if regex_ext:
                title = regex_ext.sub(matcher, title)
            date = item.pub_date
            type_ = 'blog'
        else:
            if not item.blogitem:
                item.correct_blogitem_parent()
            title = (
                "Comment on <em>%s</em>" % html_escape(item.blogitem.title)
            )
            date = item.add_date
            type_ = 'comment'

        documents.append({
            'title': title,
            'summary': '<br>'.join(sentences),
            'date': date,
            'url': item.get_absolute_url(),
            'type': type_,
        })

    def create_search(s):
        words = re.findall('\w+', s)
        words_orig = words[:]

        if 'or' in words:
            which = words.index('or')
            words_orig.remove('or')
            if (which + 1) < len(words) and which > 0:
                before = words.pop(which - 1)
                words.pop(which - 1)
                after = words.pop(which - 1)
                words.insert(which - 1, '%s | %s' % (before, after))
        while 'and' in words_orig:
            words_orig.remove('and')
        while 'and' in words:
            words.remove('and')

        escaped = ' & '.join(words)
        return escaped, words_orig

    data['q'] = search

    keyword_search = {}
    if len(search) > 1:
        _keyword_keys = ('keyword', 'keywords', 'category', 'categories')
        search, keyword_search = split_search(search, _keyword_keys)
    redis = get_redis_connection(reconnection_wrapped=True)

    not_ids = defaultdict(set)
    times = []
    search_times = []
    count_documents = []
    regex = regex_ext = None

    def append_queryset_search(queryset, order_by, words, model_name):
        count = items.count()
        count_documents.append(count)
        for item in items.order_by(order_by)[:20]:
            append_match(item, words)
            not_ids[model_name].add(item.pk)
        return count

    now = utc_now()

    if len(search) > 1:
        search_escaped, words = create_search(search)
        regex = re.compile(
            r'\b(%s)' % '|'.join(
                re.escape(word)
                for word in words
                if word.lower() not in STOPWORDS
            ),
            re.I | re.U
        )
        regex_ext = re.compile(
            r'\b(%s\w*)\b' % '|'.join(
                re.escape(word)
                for word in words
                if word.lower() not in STOPWORDS
            ),
            re.I | re.U
        )

        for model in (BlogItem, BlogComment):
            qs = model.objects
            model_name = model._meta.object_name
            if model == BlogItem:
                qs = qs.filter(pub_date__lte=now)
                fields = ('title', 'text')
                order_by = '-pub_date'
                if keyword_search.get('keyword'):
                    # use Redis!
                    ids = redis.smembers('kw:%s' % keyword_search['keyword'])
                    if ids:
                        qs = qs.filter(pk__in=ids)
                if keyword_search.get('keywords'):
                    # use Redis!
                    ids = []
                    for each in [x.strip() for x
                                 in keyword_search['keywords'].split(',')
                                 if x.strip()]:
                        ids.extend(redis.smembers('kw:%s' % each))
                    if ids:
                        qs = qs.filter(pk__in=ids)
            elif model == BlogComment:
                fields = ('comment',)
                order_by = '-add_date'
                _specials = ('keyword', 'keywords', 'category', 'categories')
                if any(keyword_search.get(k) for k in _specials):
                    # BlogComments don't have this keyword so it can
                    # never match
                    continue

            for field in fields:
                if not_ids[model_name]:
                    qs = qs.exclude(pk__in=not_ids[model_name])
                _sql = "to_tsvector('english'," + field + ") "
                if ' | ' in search_escaped or ' & ' in search_escaped:
                    _sql += "@@ to_tsquery('english', %s)"
                else:
                    _sql += "@@ plainto_tsquery('english', %s)"
                items = qs.extra(where=[_sql], params=[search_escaped])

                t0 = time.time()
                count = append_queryset_search(
                    items, order_by, words, model_name
                )
                t1 = time.time()
                times.append('%s to find %s %ss by field %s' % (
                    t1 - t0,
                    count,
                    model_name,
                    field
                ))
                search_times.append(t1-t0)

        logger.info('Searchin for %r:\n%s' % (search, '\n'.join(times)))
    elif keyword_search and any(keyword_search.values()):
        t0 = time.time()
        if keyword_search.get('keyword') or keyword_search.get('keywords'):
            if keyword_search.get('keyword'):
                ids = redis.smembers('kw:%s' % keyword_search['keyword'])
            else:
                ids = []
                for each in [x.strip() for x
                             in keyword_search.get('keywords').split(',')
                             if x.strip()]:
                    ids.extend(redis.smembers('kw:%s' % each))
            if ids:
                items = BlogItem.objects.filter(pk__in=ids)
                model_name = BlogItem._meta.object_name
                append_queryset_search(items, '-pub_date', [], model_name)

        if keyword_search.get('category') or keyword_search.get('categories'):
            if keyword_search.get('category'):
                categories = Category.objects.filter(
                    name=keyword_search.get('category')
                )
            else:
                cats = [x.strip() for x
                        in keyword_search.get('categories').split(',')
                        if x.strip()]
                categories = Category.objects.filter(name__in=cats)
            if categories:
                cat_q = make_categories_q(categories)
                items = BlogItem.objects.filter(cat_q)
                model_name = BlogItem._meta.object_name
                append_queryset_search(items, '-pub_date', [], model_name)
        t1 = time.time()
        search_times.append(t1 - t0)

    data['search_time'] = sum(search_times)
    count_documents_shown = len(documents)
    data['documents'] = documents
    data['count_documents'] = sum(count_documents)
    data['count_documents_shown'] = count_documents_shown
    data['better'] = None
    if not data['count_documents']:
        _qterms = len(data['q'].split())
        if ' or ' not in data['q'] and _qterms > 1 and _qterms < 5:
            data['better'] = data['q'].replace(' ', ' or ')
    if data['better']:
        data['better_url'] = (
            reverse('search') + '?' +
            urllib.urlencode({'q': data['better'].encode('utf-8')})
        )

    if not data['q']:
        page_title = 'Search'
    elif data['count_documents'] == 1:
        page_title = '1 thing found'
    else:
        page_title = '%s things found' % data['count_documents']
    if count_documents_shown < data['count_documents']:
        if count_documents_shown == 1:
            page_title += ' (but only 1 thing shown)'
        else:
            page_title += ' (but only %s things shown)' % count_documents_shown
    data['page_title'] = page_title

    if (
        not data['count_documents'] and
        len(search.split()) == 1 and not keyword_search
    ):
        if redis.smembers('kw:%s' % search):
            url = reverse('search')
            url += '?' + urllib.urlencode({'q': 'keyword:%s' % search})
            return redirect(url)

    return render(request, 'homepage/search.html', data)
Ejemplo n.º 13
0
 def setUp(self):
     super(PlogTestCase, self).setUp()
     self.redis = get_redis_connection()
     self.redis.flushdb()
Ejemplo n.º 14
0
 def setUp(self):
     self.redis = get_redis_connection()
     self.redis.flushdb()