def redis_increment(prefix, request): redis = get_redis_connection() full_path = request.get_full_path() if full_path != '/' and full_path.endswith('/'): full_path = full_path[:-1] value = '%s %s' % (request.method, iri_to_uri(full_path)) redis.zincrby(prefix, value, 1)
def handle(self, **options): redis = get_redis_connection() redis.expire('plog:hits', 1) redis.expire('plog:misses', 1) redis.expire('homepage:hits', 1) redis.expire('homepage:misses', 1) redis.set('counters-start', str(datetime.datetime.utcnow()))
def autocomplete(request): q = request.GET.get('q') if not q: return [] conn = get_redis_connection('titles') search_index = RedisSearchIndex(conn) results = search_index.search(q, n=10) return results
def stats_index(request): data = {} redis = get_redis_connection() urls = {} def get_totals(prefix): total_hits = total_misses = 0 for uri, count in redis.zrevrange('%s:hits' % prefix, 0, 100, withscores=True): count = int(count) total_hits += count if uri not in urls: urls[uri] = {'hits': 0, 'misses': 0} urls[uri]['hits'] += count for uri, count in redis.zrevrange('%s:misses' % prefix, 0, 100, withscores=True): count = int(count) total_misses += count if uri not in urls: urls[uri] = {'hits': 0, 'misses': 0} urls[uri]['misses'] += count if total_hits: total_ratio = round(100.0 * total_misses / total_hits, 1) else: total_ratio = '' return { 'total_hits': total_hits, 'total_misses': total_misses, 'total_ratio': total_ratio } data['plog'] = get_totals('plog') data['homepage'] = get_totals('homepage') total_hits = total_misses = 0 for v in urls.values(): total_hits += v['hits'] total_misses += v['misses'] if v['hits']: v['ratio'] = '%.1f%%' % (100.0 * v['misses'] / v['hits']) else: v['ratio'] = '--' def make_abs_url(url): if url.startswith('GET '): return url[4:] return None urls = [(make_abs_url(x), x, y['hits'], y['misses'], y['ratio']) for x, y in urls.items()] urls.sort(lambda x, y: cmp(y[1], x[1])) data['urls'] = urls data['start_date'] = redis.get('counters-start') data['total_hits'] = total_hits data['total_misses'] = total_misses data['page_title'] = 'Stats' return render(request, 'stats/index.html', data)
def handle(self, *args, **options): now = utc_now() connection = get_redis_connection('titles') search_index = RedisSearchIndex(connection) query = u' '.join(args) print "QUERY:", repr(query) t0 = time.time() results = search_index.search(query) t1 = time.time() print "In", t1 - t0, "seconds" print "TERMS:", results['terms'] for id, score, title in results['results']: print "\t", id.ljust(4), score, repr(title)
def add_post(request): context = {} user = request.user assert user.is_staff or user.is_superuser if request.method == 'POST': form = BlogForm(data=request.POST) if form.is_valid(): keywords = [ x.strip() for x in form.cleaned_data['keywords'].splitlines() if x.strip() ] blogitem = BlogItem.objects.create( oid=form.cleaned_data['oid'], title=form.cleaned_data['title'], text=form.cleaned_data['text'], summary=form.cleaned_data['summary'], display_format=form.cleaned_data['display_format'], codesyntax=form.cleaned_data['codesyntax'], url=form.cleaned_data['url'], pub_date=form.cleaned_data['pub_date'], keywords=keywords, ) for category in form.cleaned_data['categories']: blogitem.categories.add(category) blogitem.save() redis = get_redis_connection(reconnection_wrapped=True) for keyword in keywords: if not redis.smembers('kw:%s' % keyword): redis.sadd('kw:%s' % keyword, blogitem.pk) redis.incr('kwcount') url = reverse('edit_post', args=[blogitem.oid]) return redirect(url) else: initial = { 'pub_date': utc_now() + datetime.timedelta(seconds=60 * 60), 'display_format': 'markdown', } form = BlogForm(initial=initial) context['form'] = form context['page_title'] = 'Add post' context['blogitem'] = None return render(request, 'plog/edit.html', context)
def _get_related_pks(post): redis = get_redis_connection(reconnection_wrapped=True) count_keywords = redis.get("kwcount") if not count_keywords: for p in BlogItem.objects.filter(pub_date__lt=utc_now()): for keyword in p.keywords: redis.sadd("kw:%s" % keyword, p.pk) redis.incr("kwcount") _keywords = post.keywords _related = defaultdict(int) for i, keyword in enumerate(_keywords): ids = redis.smembers("kw:%s" % keyword) for pk in ids: pk = int(pk) if pk != post.pk: _related[pk] += len(_keywords) - i items = sorted(((v, k) for (k, v) in _related.items()), reverse=True) return [y for (x, y) in items]
def handle(self, *args, **options): now = utc_now() verbose = int(options['verbosity']) > 1 connection = get_redis_connection('titles') connection.flushdb() search_index = RedisSearchIndex(connection) for plog in models.BlogItem.objects.filter(pub_date__lte=now).order_by('?'): if verbose: print repr(plog.title), # print search_index.add_item(plog.id, plog.title, 1) try: hits = models.BlogItemHits.objects.get(oid=plog.oid).hits except models.BlogItemHits.DoesNotExist: hits = 1 result = search_index.add_item(plog.oid, plog.title, hits), hits if verbose: print result
def edit_post(request, oid): blogitem = get_object_or_404(BlogItem, oid=oid) data = {} user = request.user assert user.is_staff or user.is_superuser if request.method == 'POST': form = BlogForm(instance=blogitem, data=request.POST) if form.is_valid(): blogitem.oid = form.cleaned_data['oid'] blogitem.title = form.cleaned_data['title'] blogitem.text = form.cleaned_data['text'] blogitem.text_rendered = '' blogitem.summary = form.cleaned_data['summary'] blogitem.display_format = form.cleaned_data['display_format'] blogitem.codesyntax = form.cleaned_data['codesyntax'] blogitem.pub_date = form.cleaned_data['pub_date'] keywords = [x.strip() for x in form.cleaned_data['keywords'].splitlines() if x.strip()] blogitem.keywords = keywords blogitem.categories.clear() for category in form.cleaned_data['categories']: blogitem.categories.add(category) blogitem.save() redis = get_redis_connection(reconnection_wrapped=True) for keyword in keywords: if not redis.smembers('kw:%s' % keyword): redis.sadd('kw:%s' % keyword, blogitem.pk) redis.incr('kwcount') url = reverse('edit_post', args=[blogitem.oid]) return redirect(url) else: form = BlogForm(instance=blogitem) data['form'] = form data['page_title'] = 'Edit post' data['blogitem'] = blogitem data['INBOUND_EMAIL_ADDRESS'] = settings.INBOUND_EMAIL_ADDRESS return render(request, 'plog/edit.html', data)
def edit_post(request, oid): blogitem = get_object_or_404(BlogItem, oid=oid) data = {} user = request.user assert user.is_staff or user.is_superuser if request.method == "POST": form = BlogForm(instance=blogitem, data=request.POST) if form.is_valid(): blogitem.oid = form.cleaned_data["oid"] blogitem.title = form.cleaned_data["title"] blogitem.text = form.cleaned_data["text"] blogitem.text_rendered = "" blogitem.summary = form.cleaned_data["summary"] blogitem.display_format = form.cleaned_data["display_format"] blogitem.codesyntax = form.cleaned_data["codesyntax"] blogitem.pub_date = form.cleaned_data["pub_date"] keywords = [x.strip() for x in form.cleaned_data["keywords"].splitlines() if x.strip()] blogitem.keywords = keywords blogitem.categories.clear() for category in form.cleaned_data["categories"]: blogitem.categories.add(category) blogitem.save() redis = get_redis_connection(reconnection_wrapped=True) for keyword in keywords: if not redis.smembers("kw:%s" % keyword): redis.sadd("kw:%s" % keyword, blogitem.pk) redis.incr("kwcount") url = reverse("edit_post", args=[blogitem.oid]) return redirect(url) else: form = BlogForm(instance=blogitem) data["form"] = form data["page_title"] = "Edit post" data["blogitem"] = blogitem data["INBOUND_EMAIL_ADDRESS"] = settings.INBOUND_EMAIL_ADDRESS return render(request, "plog/edit.html", data)
def add_post(request): data = {} user = request.user assert user.is_staff or user.is_superuser if request.method == "POST": form = BlogForm(data=request.POST) if form.is_valid(): keywords = [x.strip() for x in form.cleaned_data["keywords"].splitlines() if x.strip()] blogitem = BlogItem.objects.create( oid=form.cleaned_data["oid"], title=form.cleaned_data["title"], text=form.cleaned_data["text"], summary=form.cleaned_data["summary"], display_format=form.cleaned_data["display_format"], codesyntax=form.cleaned_data["codesyntax"], url=form.cleaned_data["url"], pub_date=form.cleaned_data["pub_date"], keywords=keywords, ) for category in form.cleaned_data["categories"]: blogitem.categories.add(category) blogitem.save() redis = get_redis_connection(reconnection_wrapped=True) for keyword in keywords: if not redis.smembers("kw:%s" % keyword): redis.sadd("kw:%s" % keyword, blogitem.pk) redis.incr("kwcount") url = reverse("edit_post", args=[blogitem.oid]) return redirect(url) else: initial = {"pub_date": utc_now() + datetime.timedelta(seconds=60 * 60), "display_format": "markdown"} form = BlogForm(initial=initial) data["form"] = form data["page_title"] = "Add post" data["blogitem"] = None return render(request, "plog/edit.html", data)
def search(request): data = {} search = request.GET.get('q', '') if len(search) > 90: return http.HttpResponse("Search too long") documents = [] data['base_url'] = 'http://%s' % RequestSite(request).domain tag_strip = re.compile('<[^>]+>') def append_match(item, words): text = item.rendered text = tag_strip.sub(' ', text) sentences = [] def matcher(match): return '<b>%s</b>' % match.group() if regex: for each in regex.finditer(text): sentence = text[max(each.start() - 35, 0): each.end() + 40] sentence = regex_ext.sub(matcher, sentence) sentence = sentence.strip() if each.start() > 0 and not sentence[0].isupper(): sentence = '...%s' % sentence if each.end() < len(text): sentence = '%s...' % sentence sentences.append(sentence.strip()) if len(sentences) > 3: break if isinstance(item, BlogItem): title = html_escape(item.title) if regex_ext: title = regex_ext.sub(matcher, title) date = item.pub_date type_ = 'blog' else: if not item.blogitem: item.correct_blogitem_parent() title = ( "Comment on <em>%s</em>" % html_escape(item.blogitem.title) ) date = item.add_date type_ = 'comment' documents.append({ 'title': title, 'summary': '<br>'.join(sentences), 'date': date, 'url': item.get_absolute_url(), 'type': type_, }) def create_search(s): words = re.findall('\w+', s) words_orig = words[:] if 'or' in words: which = words.index('or') words_orig.remove('or') if (which + 1) < len(words) and which > 0: before = words.pop(which - 1) words.pop(which - 1) after = words.pop(which - 1) words.insert(which - 1, '%s | %s' % (before, after)) while 'and' in words_orig: words_orig.remove('and') while 'and' in words: words.remove('and') escaped = ' & '.join(words) return escaped, words_orig data['q'] = search keyword_search = {} if len(search) > 1: _keyword_keys = ('keyword', 'keywords', 'category', 'categories') search, keyword_search = split_search(search, _keyword_keys) redis = get_redis_connection(reconnection_wrapped=True) not_ids = defaultdict(set) times = [] search_times = [] count_documents = [] regex = regex_ext = None def append_queryset_search(queryset, order_by, words, model_name): count = items.count() count_documents.append(count) for item in items.order_by(order_by)[:20]: append_match(item, words) not_ids[model_name].add(item.pk) return count now = utc_now() if len(search) > 1: search_escaped, words = create_search(search) regex = re.compile( r'\b(%s)' % '|'.join( re.escape(word) for word in words if word.lower() not in STOPWORDS ), re.I | re.U ) regex_ext = re.compile( r'\b(%s\w*)\b' % '|'.join( re.escape(word) for word in words if word.lower() not in STOPWORDS ), re.I | re.U ) for model in (BlogItem, BlogComment): qs = model.objects model_name = model._meta.object_name if model == BlogItem: qs = qs.filter(pub_date__lte=now) fields = ('title', 'text') order_by = '-pub_date' if keyword_search.get('keyword'): # use Redis! ids = redis.smembers('kw:%s' % keyword_search['keyword']) if ids: qs = qs.filter(pk__in=ids) if keyword_search.get('keywords'): # use Redis! ids = [] for each in [x.strip() for x in keyword_search['keywords'].split(',') if x.strip()]: ids.extend(redis.smembers('kw:%s' % each)) if ids: qs = qs.filter(pk__in=ids) elif model == BlogComment: fields = ('comment',) order_by = '-add_date' _specials = ('keyword', 'keywords', 'category', 'categories') if any(keyword_search.get(k) for k in _specials): # BlogComments don't have this keyword so it can # never match continue for field in fields: if not_ids[model_name]: qs = qs.exclude(pk__in=not_ids[model_name]) _sql = "to_tsvector('english'," + field + ") " if ' | ' in search_escaped or ' & ' in search_escaped: _sql += "@@ to_tsquery('english', %s)" else: _sql += "@@ plainto_tsquery('english', %s)" items = qs.extra(where=[_sql], params=[search_escaped]) t0 = time.time() count = append_queryset_search( items, order_by, words, model_name ) t1 = time.time() times.append('%s to find %s %ss by field %s' % ( t1 - t0, count, model_name, field )) search_times.append(t1-t0) logger.info('Searchin for %r:\n%s' % (search, '\n'.join(times))) elif keyword_search and any(keyword_search.values()): t0 = time.time() if keyword_search.get('keyword') or keyword_search.get('keywords'): if keyword_search.get('keyword'): ids = redis.smembers('kw:%s' % keyword_search['keyword']) else: ids = [] for each in [x.strip() for x in keyword_search.get('keywords').split(',') if x.strip()]: ids.extend(redis.smembers('kw:%s' % each)) if ids: items = BlogItem.objects.filter(pk__in=ids) model_name = BlogItem._meta.object_name append_queryset_search(items, '-pub_date', [], model_name) if keyword_search.get('category') or keyword_search.get('categories'): if keyword_search.get('category'): categories = Category.objects.filter( name=keyword_search.get('category') ) else: cats = [x.strip() for x in keyword_search.get('categories').split(',') if x.strip()] categories = Category.objects.filter(name__in=cats) if categories: cat_q = make_categories_q(categories) items = BlogItem.objects.filter(cat_q) model_name = BlogItem._meta.object_name append_queryset_search(items, '-pub_date', [], model_name) t1 = time.time() search_times.append(t1 - t0) data['search_time'] = sum(search_times) count_documents_shown = len(documents) data['documents'] = documents data['count_documents'] = sum(count_documents) data['count_documents_shown'] = count_documents_shown data['better'] = None if not data['count_documents']: _qterms = len(data['q'].split()) if ' or ' not in data['q'] and _qterms > 1 and _qterms < 5: data['better'] = data['q'].replace(' ', ' or ') if data['better']: data['better_url'] = ( reverse('search') + '?' + urllib.urlencode({'q': data['better'].encode('utf-8')}) ) if not data['q']: page_title = 'Search' elif data['count_documents'] == 1: page_title = '1 thing found' else: page_title = '%s things found' % data['count_documents'] if count_documents_shown < data['count_documents']: if count_documents_shown == 1: page_title += ' (but only 1 thing shown)' else: page_title += ' (but only %s things shown)' % count_documents_shown data['page_title'] = page_title if ( not data['count_documents'] and len(search.split()) == 1 and not keyword_search ): if redis.smembers('kw:%s' % search): url = reverse('search') url += '?' + urllib.urlencode({'q': 'keyword:%s' % search}) return redirect(url) return render(request, 'homepage/search.html', data)
def setUp(self): super(PlogTestCase, self).setUp() self.redis = get_redis_connection() self.redis.flushdb()
def setUp(self): self.redis = get_redis_connection() self.redis.flushdb()