def handle(self, **options): redis = get_redis_connection() redis.expire('plog:hits', 1) redis.expire('plog:misses', 1) redis.expire('homepage:hits', 1) redis.expire('homepage:misses', 1) redis.set('counters-start', str(datetime.datetime.utcnow()))
def redis_increment(prefix, request): redis = get_redis_connection() full_path = request.get_full_path() if full_path != '/' and full_path.endswith('/'): full_path = full_path[:-1] value = '%s %s' % (request.method, iri_to_uri(full_path)) redis.zincrby(prefix, value, 1)
def stats_index(request): data = {} redis = get_redis_connection() urls = {} def get_totals(prefix): total_hits = total_misses = 0 for uri, count in redis.zrevrange('%s:hits' % prefix, 0, 100, withscores=True): count = int(count) total_hits += count if uri not in urls: urls[uri] = {'hits': 0, 'misses': 0} urls[uri]['hits'] += count for uri, count in redis.zrevrange('%s:misses' % prefix, 0, 100, withscores=True): count = int(count) total_misses += count if uri not in urls: urls[uri] = {'hits': 0, 'misses': 0} urls[uri]['misses'] += count if total_hits: total_ratio = round(100.0 * total_misses / total_hits, 1) else: total_ratio = '' return {#'urls': urls, 'total_hits': total_hits, 'total_misses': total_misses, 'total_ratio': total_ratio} data['plog'] = get_totals('plog') data['homepage'] = get_totals('homepage') total_hits = total_misses = 0 for v in urls.values(): total_hits += v['hits'] total_misses += v['misses'] if v['hits']: v['ratio'] = '%.1f%%' % (100.0 * v['misses'] / v['hits']) else: v['ratio'] = '--' def make_abs_url(url): if url.startswith('GET '): return url[4:] return None urls = [(make_abs_url(x), x, y['hits'], y['misses'], y['ratio']) for x, y in urls.items()] urls.sort(lambda x, y: cmp(y[1], x[1])) data['urls'] = urls data['start_date'] = redis.get('counters-start') data['total_hits'] = total_hits data['total_misses'] = total_misses return render(request, 'stats/index.html', data)
def stats_index(request): data = {} redis = get_redis_connection() urls = {} def get_totals(prefix): total_hits = total_misses = 0 for uri, count in redis.zrevrange('%s:hits' % prefix, 0, 100, withscores=True): count = int(count) total_hits += count if uri not in urls: urls[uri] = {'hits': 0, 'misses': 0} urls[uri]['hits'] += count for uri, count in redis.zrevrange('%s:misses' % prefix, 0, 100, withscores=True): count = int(count) total_misses += count if uri not in urls: urls[uri] = {'hits': 0, 'misses': 0} urls[uri]['misses'] += count if total_hits: total_ratio = round(100.0 * total_misses / total_hits, 1) else: total_ratio = '' return { #'urls': urls, 'total_hits': total_hits, 'total_misses': total_misses, 'total_ratio': total_ratio } data['plog'] = get_totals('plog') data['homepage'] = get_totals('homepage') for v in urls.values(): if v['hits']: v['ratio'] = '%.1f%%' % (100.0 * v['misses'] / v['hits']) else: v['ratio'] = '--' urls = [(x, y['hits'], y['misses'], y['ratio']) for x, y in urls.items()] urls.sort(lambda x, y: cmp(y[1], x[1])) data['urls'] = urls data['start_date'] = redis.get('counters-start') return render(request, 'stats/index.html', data)
def add_post(request): data = {} user = request.user assert user.is_staff or user.is_superuser if request.method == 'POST': form = BlogForm(data=request.POST) if form.is_valid(): keywords = [ x.strip() for x in form.cleaned_data['keywords'].splitlines() if x.strip() ] blogitem = BlogItem.objects.create( oid=form.cleaned_data['oid'], title=form.cleaned_data['title'], text=form.cleaned_data['text'], summary=form.cleaned_data['summary'], display_format=form.cleaned_data['display_format'], codesyntax=form.cleaned_data['codesyntax'], url=form.cleaned_data['url'], pub_date=form.cleaned_data['pub_date'], keywords=keywords, ) for category in form.cleaned_data['categories']: blogitem.categories.add(category) blogitem.save() redis = get_redis_connection(reconnection_wrapped=True) for keyword in keywords: if not redis.smembers('kw:%s' % keyword): redis.sadd('kw:%s' % keyword, blogitem.pk) redis.incr('kwcount') url = reverse('edit_post', args=[blogitem.oid]) return redirect(url) else: initial = { 'pub_date': utc_now() + datetime.timedelta(seconds=60 * 60), 'display_format': 'markdown', } form = BlogForm(initial=initial) data['form'] = form data['page_title'] = 'Add post' data['blogitem'] = None return render(request, 'plog/edit.html', data)
def add_post(request): data = {} user = request.user assert user.is_staff or user.is_superuser if request.method == 'POST': form = BlogForm(data=request.POST) if form.is_valid(): keywords = [x.strip() for x in form.cleaned_data['keywords'].splitlines() if x.strip()] blogitem = BlogItem.objects.create( oid=form.cleaned_data['oid'], title=form.cleaned_data['title'], text=form.cleaned_data['text'], summary=form.cleaned_data['summary'], display_format=form.cleaned_data['display_format'], codesyntax=form.cleaned_data['codesyntax'], url=form.cleaned_data['url'], pub_date=form.cleaned_data['pub_date'], keywords=keywords, ) for category in form.cleaned_data['categories']: blogitem.categories.add(category) blogitem.save() redis = get_redis_connection(reconnection_wrapped=True) for keyword in keywords: if not redis.smembers('kw:%s' % keyword): redis.sadd('kw:%s' % keyword, blogitem.pk) redis.incr('kwcount') url = reverse('edit_post', args=[blogitem.oid]) return redirect(url) else: initial = { 'pub_date': utc_now() + datetime.timedelta(seconds=60 * 60), 'display_format': 'markdown', } form = BlogForm(initial=initial) data['form'] = form data['page_title'] = 'Add post' data['blogitem'] = None return render(request, 'plog/edit.html', data)
def _get_related_pks(post, max_): redis = get_redis_connection(reconnection_wrapped=True) count_keywords = redis.get('kwcount') if not count_keywords: for p in (BlogItem.objects.filter(pub_date__lt=utc_now())): for keyword in p.keywords: redis.sadd('kw:%s' % keyword, p.pk) redis.incr('kwcount') _keywords = post.keywords _related = defaultdict(int) for i, keyword in enumerate(_keywords): ids = redis.smembers('kw:%s' % keyword) for pk in ids: pk = int(pk) if pk != post.pk: _related[pk] += (len(_keywords) - i) items = sorted(((v, k) for (k, v) in _related.items()), reverse=True) return [y for (x, y) in items][:max_]
def edit_post(request, oid): blogitem = get_object_or_404(BlogItem, oid=oid) data = {} user = request.user assert user.is_staff or user.is_superuser if request.method == 'POST': form = BlogForm(instance=blogitem, data=request.POST) if form.is_valid(): blogitem.oid = form.cleaned_data['oid'] blogitem.title = form.cleaned_data['title'] blogitem.text = form.cleaned_data['text'] blogitem.text_rendered = '' blogitem.summary = form.cleaned_data['summary'] blogitem.display_format = form.cleaned_data['display_format'] blogitem.codesyntax = form.cleaned_data['codesyntax'] blogitem.pub_date = form.cleaned_data['pub_date'] keywords = [ x.strip() for x in form.cleaned_data['keywords'].splitlines() if x.strip() ] blogitem.keywords = keywords blogitem.categories.clear() for category in form.cleaned_data['categories']: blogitem.categories.add(category) blogitem.save() redis = get_redis_connection(reconnection_wrapped=True) for keyword in keywords: if not redis.smembers('kw:%s' % keyword): redis.sadd('kw:%s' % keyword, blogitem.pk) redis.incr('kwcount') url = reverse('edit_post', args=[blogitem.oid]) return redirect(url) else: form = BlogForm(instance=blogitem) data['form'] = form data['page_title'] = 'Edit post' data['blogitem'] = blogitem data['INBOUND_EMAIL_ADDRESS'] = settings.INBOUND_EMAIL_ADDRESS return render(request, 'plog/edit.html', data)
def _get_related_pks(post, max_): redis = get_redis_connection(reconnection_wrapped=True) count_keywords = redis.get('kwcount') if not count_keywords: for p in (BlogItem.objects .filter(pub_date__lt=utc_now())): for keyword in p.keywords: redis.sadd('kw:%s' % keyword, p.pk) redis.incr('kwcount') _keywords = post.keywords _related = defaultdict(int) for i, keyword in enumerate(_keywords): ids = redis.smembers('kw:%s' % keyword) for pk in ids: pk = int(pk) if pk != post.pk: _related[pk] += (len(_keywords) - i) items = sorted(((v, k) for (k, v) in _related.items()), reverse=True) return [y for (x, y) in items][:max_]
def edit_post(request, oid): blogitem = get_object_or_404(BlogItem, oid=oid) data = {} user = request.user assert user.is_staff or user.is_superuser if request.method == 'POST': form = BlogForm(instance=blogitem, data=request.POST) if form.is_valid(): blogitem.oid = form.cleaned_data['oid'] blogitem.title = form.cleaned_data['title'] blogitem.text = form.cleaned_data['text'] blogitem.text_rendered = '' blogitem.summary = form.cleaned_data['summary'] blogitem.display_format = form.cleaned_data['display_format'] blogitem.codesyntax = form.cleaned_data['codesyntax'] blogitem.pub_date = form.cleaned_data['pub_date'] keywords = [x.strip() for x in form.cleaned_data['keywords'].splitlines() if x.strip()] blogitem.keywords = keywords blogitem.categories.clear() for category in form.cleaned_data['categories']: blogitem.categories.add(category) blogitem.save() redis = get_redis_connection(reconnection_wrapped=True) for keyword in keywords: if not redis.smembers('kw:%s' % keyword): redis.sadd('kw:%s' % keyword, blogitem.pk) redis.incr('kwcount') url = reverse('edit_post', args=[blogitem.oid]) return redirect(url) else: form = BlogForm(instance=blogitem) data['form'] = form data['page_title'] = 'Edit post' data['blogitem'] = blogitem data['INBOUND_EMAIL_ADDRESS'] = settings.INBOUND_EMAIL_ADDRESS return render(request, 'plog/edit.html', data)
def search(request): data = {} search = request.GET.get('q', '') if len(search) > 100: return http.HttpResponse("Search too long") documents = [] data['base_url'] = 'http://%s' % RequestSite(request).domain tag_strip = re.compile('<[^>]+>') def append_match(item, words): text = item.rendered text = tag_strip.sub(' ', text) sentences = [] def matcher(match): return '<b>%s</b>' % match.group() if regex: for each in regex.finditer(text): sentence = text[max(each.start() - 35, 0): each.end() + 40] sentence = regex_ext.sub(matcher, sentence) sentence = sentence.strip() if each.start() > 0 and not sentence[0].isupper(): sentence = '...%s' % sentence if each.end() < len(text): sentence = '%s...' % sentence sentences.append(sentence.strip()) if len(sentences) > 3: break if isinstance(item, BlogItem): title = html_escape(item.title) if regex_ext: title = regex_ext.sub(matcher, title) date = item.pub_date type_ = 'blog' else: if not item.blogitem: item.correct_blogitem_parent() title = ("Comment on <em>%s</em>" % html_escape(item.blogitem.title)) date = item.add_date type_ = 'comment' documents.append({ 'title': title, 'summary': '<br>'.join(sentences), 'date': date, 'url': item.get_absolute_url(), 'type': type_, }) def create_search(s): words = re.findall('\w+', s) words_orig = words[:] if 'or' in words: which = words.index('or') words_orig.remove('or') if (which + 1) < len(words) and which > 0: before = words.pop(which - 1) words.pop(which - 1) after = words.pop(which - 1) words.insert(which - 1, '%s | %s' % (before, after)) while 'and' in words_orig: words_orig.remove('and') while 'and' in words: words.remove('and') escaped = ' & '.join(words) return escaped, words_orig data['q'] = search keyword_search = {} if len(search) > 1: _keyword_keys = ('keyword', 'keywords', 'category', 'categories') search, keyword_search = split_search(search, _keyword_keys) redis = get_redis_connection(reconnection_wrapped=True) not_ids = defaultdict(set) times = [] count_documents = [] regex = regex_ext = None def append_queryset_search(queryset, order_by, words, model_name): count = items.count() count_documents.append(count) for item in items.order_by(order_by)[:20]: append_match(item, words) not_ids[model_name].add(item.pk) return count if len(search) > 1: search_escaped, words = create_search(search) regex = re.compile(r'\b(%s)' % '|'.join(re.escape(word) for word in words if word.lower() not in STOPWORDS), re.I | re.U) regex_ext = re.compile(r'\b(%s\w*)\b' % '|'.join(re.escape(word) for word in words if word.lower() not in STOPWORDS), re.I | re.U) for model in (BlogItem, BlogComment): qs = model.objects model_name = model._meta.object_name if model == BlogItem: fields = ('title', 'text') order_by = '-pub_date' if keyword_search.get('keyword'): # use Redis! ids = redis.smembers('kw:%s' % keyword_search['keyword']) if ids: qs = qs.filter(pk__in=ids) if keyword_search.get('keywords'): # use Redis! ids = [] for each in [x.strip() for x in keyword_search['keywords'].split(',') if x.strip()]: ids.extend(redis.smembers('kw:%s' % each)) if ids: qs = qs.filter(pk__in=ids) elif model == BlogComment: fields = ('comment',) order_by = '-add_date' if any(keyword_search.get(k) for k in ('keyword', 'keywords', 'category', 'categories')): # BlogComments don't have this keyword so it can never match continue for field in fields: if not_ids[model_name]: qs = qs.exclude(pk__in=not_ids[model_name]) _sql = "to_tsvector('english'," + field + ") " if ' | ' in search_escaped or ' & ' in search_escaped: _sql += "@@ to_tsquery('english', %s)" else: _sql += "@@ plainto_tsquery('english', %s)" items = (qs .extra(where=[_sql], params=[search_escaped])) t0 = time.time() count = append_queryset_search(items, order_by, words, model_name) t1 = time.time() times.append('%s to find %s %ss by field %s' % ( t1 - t0, count, model_name, field )) #print 'Searchin for %r:\n%s' % (search, '\n'.join(times)) logging.info('Searchin for %r:\n%s' % (search, '\n'.join(times))) elif keyword_search and any(keyword_search.values()): if keyword_search.get('keyword') or keyword_search.get('keywords'): if keyword_search.get('keyword'): ids = redis.smembers('kw:%s' % keyword_search['keyword']) else: ids = [] for each in [x.strip() for x in keyword_search.get('keywords').split(',') if x.strip()]: ids.extend(redis.smembers('kw:%s' % each)) if ids: items = BlogItem.objects.filter(pk__in=ids) model_name = BlogItem._meta.object_name append_queryset_search(items, '-pub_date', [], model_name) if keyword_search.get('category') or keyword_search.get('categories'): if keyword_search.get('category'): categories = Category.objects.filter(name=keyword_search.get('category')) else: cats = [x.strip() for x in keyword_search.get('categories').split(',') if x.strip()] categories = Category.objects.filter(name__in=cats) if categories: cat_q = make_categories_q(categories) items = BlogItem.objects.filter(cat_q) model_name = BlogItem._meta.object_name append_queryset_search(items, '-pub_date', [], model_name) count_documents_shown = len(documents) data['documents'] = documents data['count_documents'] = sum(count_documents) data['count_documents_shown'] = count_documents_shown data['better'] = None if not data['count_documents']: if ' or ' not in data['q'] and len(data['q'].split()) > 1: data['better'] = data['q'].replace(' ', ' or ') if data['better']: data['better_url'] = (reverse('search') + '?' + urllib.urlencode({'q': data['better'].encode('utf-8')})) if data['count_documents'] == 1: page_title = '1 thing found' else: page_title = '%s things found' % data['count_documents'] if count_documents_shown < data['count_documents']: if count_documents_shown == 1: page_title += ' (but only 1 thing shown)' else: page_title += ' (but only %s things shown)' % count_documents_shown data['page_title'] = page_title return render(request, 'homepage/search.html', data)
def setUp(self): redis = get_redis_connection() redis.flushdb()
def redis_increment(prefix, request): redis = get_redis_connection() value = '%s %s' % (request.method, iri_to_uri(request.get_full_path())) redis.zincrby(prefix, value, 1)
def setUp(self): self.redis = get_redis_connection() self.redis.flushdb()
def search(request): data = {} search = request.GET.get('q', '') if len(search) > 100: return http.HttpResponse("Search too long") documents = [] data['base_url'] = 'http://%s' % RequestSite(request).domain tag_strip = re.compile('<[^>]+>') def append_match(item, words): text = item.rendered text = tag_strip.sub(' ', text) sentences = [] def matcher(match): return '<b>%s</b>' % match.group() if regex: for each in regex.finditer(text): sentence = text[max(each.start() - 35, 0):each.end() + 40] sentence = regex_ext.sub(matcher, sentence) sentence = sentence.strip() if each.start() > 0 and not sentence[0].isupper(): sentence = '...%s' % sentence if each.end() < len(text): sentence = '%s...' % sentence sentences.append(sentence.strip()) if len(sentences) > 3: break if isinstance(item, BlogItem): title = html_escape(item.title) if regex_ext: title = regex_ext.sub(matcher, title) date = item.pub_date type_ = 'blog' else: if not item.blogitem: item.correct_blogitem_parent() title = ("Comment on <em>%s</em>" % html_escape(item.blogitem.title)) date = item.add_date type_ = 'comment' documents.append({ 'title': title, 'summary': '<br>'.join(sentences), 'date': date, 'url': item.get_absolute_url(), 'type': type_, }) def create_search(s): words = re.findall('\w+', s) words_orig = words[:] if 'or' in words: which = words.index('or') words_orig.remove('or') if (which + 1) < len(words) and which > 0: before = words.pop(which - 1) words.pop(which - 1) after = words.pop(which - 1) words.insert(which - 1, '%s | %s' % (before, after)) while 'and' in words_orig: words_orig.remove('and') while 'and' in words: words.remove('and') escaped = ' & '.join(words) return escaped, words_orig data['q'] = search keyword_search = {} if len(search) > 1: _keyword_keys = ('keyword', 'keywords', 'category', 'categories') search, keyword_search = split_search(search, _keyword_keys) redis = get_redis_connection(reconnection_wrapped=True) not_ids = defaultdict(set) times = [] count_documents = [] regex = regex_ext = None def append_queryset_search(queryset, order_by, words, model_name): count = items.count() count_documents.append(count) for item in items.order_by(order_by)[:20]: append_match(item, words) not_ids[model_name].add(item.pk) return count if len(search) > 1: search_escaped, words = create_search(search) regex = re.compile( r'\b(%s)' % '|'.join( re.escape(word) for word in words if word.lower() not in STOPWORDS), re.I | re.U) regex_ext = re.compile( r'\b(%s\w*)\b' % '|'.join( re.escape(word) for word in words if word.lower() not in STOPWORDS), re.I | re.U) for model in (BlogItem, BlogComment): qs = model.objects model_name = model._meta.object_name if model == BlogItem: fields = ('title', 'text') order_by = '-pub_date' if keyword_search.get('keyword'): # use Redis! ids = redis.smembers('kw:%s' % keyword_search['keyword']) if ids: qs = qs.filter(pk__in=ids) if keyword_search.get('keywords'): # use Redis! ids = [] for each in [ x.strip() for x in keyword_search['keywords'].split(',') if x.strip() ]: ids.extend(redis.smembers('kw:%s' % each)) if ids: qs = qs.filter(pk__in=ids) elif model == BlogComment: fields = ('comment', ) order_by = '-add_date' if any( keyword_search.get(k) for k in ('keyword', 'keywords', 'category', 'categories')): # BlogComments don't have this keyword so it can never match continue for field in fields: if not_ids[model_name]: qs = qs.exclude(pk__in=not_ids[model_name]) _sql = "to_tsvector('english'," + field + ") " if ' | ' in search_escaped or ' & ' in search_escaped: _sql += "@@ to_tsquery('english', %s)" else: _sql += "@@ plainto_tsquery('english', %s)" items = (qs.extra(where=[_sql], params=[search_escaped])) t0 = time.time() count = append_queryset_search(items, order_by, words, model_name) t1 = time.time() times.append('%s to find %s %ss by field %s' % (t1 - t0, count, model_name, field)) #print 'Searchin for %r:\n%s' % (search, '\n'.join(times)) logging.info('Searchin for %r:\n%s' % (search, '\n'.join(times))) elif keyword_search and any(keyword_search.values()): if keyword_search.get('keyword') or keyword_search.get('keywords'): if keyword_search.get('keyword'): ids = redis.smembers('kw:%s' % keyword_search['keyword']) else: ids = [] for each in [ x.strip() for x in keyword_search.get('keywords').split(',') if x.strip() ]: ids.extend(redis.smembers('kw:%s' % each)) if ids: items = BlogItem.objects.filter(pk__in=ids) model_name = BlogItem._meta.object_name append_queryset_search(items, '-pub_date', [], model_name) if keyword_search.get('category') or keyword_search.get('categories'): if keyword_search.get('category'): categories = Category.objects.filter( name=keyword_search.get('category')) else: cats = [ x.strip() for x in keyword_search.get('categories').split(',') if x.strip() ] categories = Category.objects.filter(name__in=cats) if categories: cat_q = make_categories_q(categories) items = BlogItem.objects.filter(cat_q) model_name = BlogItem._meta.object_name append_queryset_search(items, '-pub_date', [], model_name) count_documents_shown = len(documents) data['documents'] = documents data['count_documents'] = sum(count_documents) data['count_documents_shown'] = count_documents_shown data['better'] = None if not data['count_documents']: if ' or ' not in data['q'] and len(data['q'].split()) > 1: data['better'] = data['q'].replace(' ', ' or ') if data['better']: data['better_url'] = ( reverse('search') + '?' + urllib.urlencode({'q': data['better'].encode('utf-8')})) if not data['q']: page_title = '' elif data['count_documents'] == 1: page_title = '1 thing found' else: page_title = '%s things found' % data['count_documents'] if count_documents_shown < data['count_documents']: if count_documents_shown == 1: page_title += ' (but only 1 thing shown)' else: page_title += ' (but only %s things shown)' % count_documents_shown data['page_title'] = page_title if data['documents']: data['first_document_url'] = data['documents'][0]['url'] else: data['first_document_url'] = None if not data['count_documents'] and len( search.split()) == 1 and not keyword_search: if redis.smembers('kw:%s' % search): url = reverse('search') url += '?' + urllib.urlencode({'q': 'keyword:%s' % search}) return redirect(url) return render(request, 'homepage/search.html', data)