def test_blog_post_caching(self): blog = BlogItem.objects.create( oid='some-longish-test-post', title='TITLEX', text='BLABLABLA', display_format='structuredtext', pub_date=utc_now() - datetime.timedelta(seconds=10), ) url = reverse('blog_post', args=[blog.oid]) import peterbecom.apps.plog.views old_render = peterbecom.apps.plog.views.render from django.shortcuts import render as django_render render_counts = [] def mocked_render(*a, **k): render_counts.append(1) return django_render(*a, **k) peterbecom.apps.plog.views.render = mocked_render try: response = self.client.get(url) self.assertTrue(blog.title in response.content) assert '0 comments' in response.content response = self.client.get(url) assert '0 comments' in response.content BlogComment.objects.create( comment="textext", blogitem=blog, approved=True, add_date=utc_now() + datetime.timedelta(seconds=1), ) response = self.client.get(url) assert '1 comment' in response.content finally: peterbecom.apps.plog.views.render = old_render assert len(render_counts) == 2, render_counts self.assertTrue( self.redis.zrange('plog:hits', 0, -1, withscores=True), [('/plog/some-longish-test-post', 5.0)] ) self.assertTrue( self.redis.zrange('plog:misses', 0, -1, withscores=True), [('/plog/some-longish-test-post', 1.0)] )
def handle(self, *args, **options): now = utc_now() verbose = int(options['verbosity']) > 1 base_url = 'http://%s' % Site.objects.all()[0].domain qs = models.BlogItem.objects.filter(pub_date__lte=now).order_by('?') if not options['all']: qs = qs[:options['max']] documents = [] for plog in qs: if verbose: print repr(plog.title), try: hits = models.BlogItemHits.objects.get(oid=plog.oid).hits except models.BlogItemHits.DoesNotExist: hits = 1 data = { 'title': plog.title, 'url': base_url + reverse('blog_post', args=(plog.oid,)), 'popularity': hits, } documents.append(data) response = requests.post( 'https://autocompeter.com/v1/bulk', data=json.dumps({'documents': documents}), headers={'Auth-Key': settings.AUTOCOMPETER_AUTH_KEY} ) if verbose: pprint(documents) print response
def handle(self, *args, **options): now = utc_now() verbose = int(options['verbosity']) > 1 qs = BlogItemHits.objects.filter(hits__gt=0) for hit in qs.values('oid', 'hits'): # This is totally arbitrary! # I'm using hits and number of comments as a measure of # how is should be ranked. # The thinking is that posts that are found and read are # likely to be more popular and should thus be ranked # higher. plogrank = hit['hits'] comments = ( BlogComment.objects .filter(blogitem__oid=hit['oid']).count() ) # multiple by a factor to make this slightly more significant plogrank += comments * 10 ( BlogItem.objects .filter(oid=hit['oid']) .update(plogrank=plogrank) ) if verbose: print str(plogrank).rjust(7), '\t', hit['oid']
def test_postmark_inbound(self): here = os.path.dirname(__file__) filepath = os.path.join(here, 'raw_data.1333828973.78.json') url = reverse('inbound_email') json_content = open(filepath).read() response = self.client.post(url, data=json_content, content_type="application/json") self.assertEqual(response.status_code, 200) self.assertTrue("error" in response.content.lower()) self.assertTrue("no hashkey defined in subject line" in response.content.lower()) post = BlogItem.objects.create( oid='some-longish-test-post', title='TITLEX', text='BLABLABLA', display_format='structuredtext', pub_date=utc_now() - datetime.timedelta(seconds=10), ) hashkey = post.get_or_create_inbound_hashkey() json_content = json_content.replace('Test subject', '%s: Test Title' % hashkey) response = self.client.post(url, data=json_content, content_type="application/json") self.assertEqual(response.status_code, 200) self.assertTrue("OK" in response.content) self.assertTrue(BlogFile.objects.filter(blogitem=post)) blogfile, = BlogFile.objects.filter(blogitem=post) self.assertEqual(blogfile.title, 'Test Title') self.assertTrue(blogfile.file.read())
def test_text_rendering_with_images(self): blog = BlogItem.objects.create( oid='myoid', title='TITLEX', text=""" "image.png":/plog/myoid/image.png and *this* """, display_format='structuredtext', pub_date=utc_now() - datetime.timedelta(seconds=10), ) url = reverse('blog_post', args=[blog.oid]) response = self.client.get(url) content = response.content.split('id="post"')[1].split('</section')[0] self.assertTrue('<em>this</em>' in content) regex_str = ('/CONTENTCACHE-\d+%s' % (re.escape('/plog/myoid/image.png'),)) self.assertTrue(re.findall(regex_str, content)) old = settings.STATIC_URL settings.STATIC_URL = '//some.cdn.com/' try: blog.text_rendered = '' blog.save() response = self.client.get(url) content = response.content.split('id="post"')[1].split('</section')[0] regex_str = ('%sCONTENTCACHE-\d+%s' % (settings.STATIC_URL, re.escape('/plog/myoid/image.png'))) self.assertTrue(re.findall(regex_str, content)) finally: settings.STATIC_URL = old
def home_rest(request, from_index, to_index): qs = ( BlogItem.objects.filter(pub_date__lt=utc_now()) .order_by('-pub_date') ) context = { 'blogitems': qs[from_index : to_index] } return render(request, 'homepage/_posts.html', context)
def sitemap(request): base_url = 'http://%s' % RequestSite(request).domain urls = [] urls.append('<?xml version="1.0" encoding="iso-8859-1"?>') urls.append('<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">') def add(loc, lastmod=None, changefreq='monthly', priority=None): url = '<url><loc>%s%s</loc>' % (base_url, loc) if lastmod: url += '<lastmod>%s</lastmod>' % lastmod.strftime('%Y-%m-%d') if priority: url += '<priority>%s</priority>' % priority if changefreq: url += '<changefreq>%s</changefreq>' % changefreq url += '</url>' urls.append(url) now = utc_now() latest_blogitem, = (BlogItem.objects .filter(pub_date__lt=now) .order_by('-pub_date')[:1]) add('/', priority=1.0, changefreq='daily', lastmod=latest_blogitem.pub_date) add(reverse('about'), changefreq='weekly', priority=0.5) add(reverse('contact'), changefreq='weekly', priority=0.5) for blogitem in (BlogItem.objects .filter(pub_date__lt=now) .order_by('-pub_date')[:1000]): if not blogitem.modify_date: # legacy! try: latest_comment, = (BlogComment.objects .filter(approved=True, blogitem=blogitem) .order_by('-add_date')[:1]) blogitem.modify_date = latest_comment.add_date except ValueError: blogitem.modify_date = blogitem.pub_date blogitem._modify_date_set = True blogitem.save() age = (now - blogitem.modify_date).days if age < 14: changefreq = 'daily' elif age < 60: changefreq = 'weekly' elif age < 100: changefreq = 'monthly' else: changefreq = None add(reverse('blog_post', args=[blogitem.oid]), lastmod=blogitem.modify_date, changefreq=changefreq ) urls.append('</urlset>') return http.HttpResponse('\n'.join(urls), mimetype="text/xml")
def get_data(max_length=1000, pub_date_format=None, offset=0): items = [] qs = BlogItem.objects.filter(pub_date__lt=utc_now()).order_by('-pub_date') for item in qs[offset:max_length]: pub_date = item.pub_date if pub_date_format: pub_date = pub_date_format(pub_date) items.append({ 'title': item.title, 'slug': item.oid, 'pub_date': pub_date, 'keywords': [x for x in item.keywords if x][:3], 'categories': [x.name for x in item.categories.all()[:3]] }) return items
def handle(self, *args, **options): now = utc_now() connection = get_redis_connection('titles') search_index = RedisSearchIndex(connection) query = u' '.join(args) print "QUERY:", repr(query) t0 = time.time() results = search_index.search(query) t1 = time.time() print "In", t1 - t0, "seconds" print "TERMS:", results['terms'] for id, score, title in results['results']: print "\t", id.ljust(4), score, repr(title)
def home(request, oc=None): data = {} qs = BlogItem.objects.filter(pub_date__lt=utc_now()) if oc: categories = parse_ocs_to_categories(oc) cat_q = make_categories_q(categories) qs = qs.filter(cat_q) data['categories'] = categories ## Reasons for not being here if request.method == 'HEAD': return http.HttpResponse('') try: redis_increment('homepage:misses', request) except Exception: logging.error('Unable to redis.zincrby', exc_info=True) BATCH_SIZE = 10 try: page = max(1, int(request.GET.get('page', 1))) - 1 except ValueError: raise http.Http404('invalid page value') n, m = page * BATCH_SIZE, (page + 1) * BATCH_SIZE max_count = qs.count() first_post, = qs.order_by('-pub_date')[:1] data['first_post_url'] = request.build_absolute_uri( reverse('blog_post', args=[first_post.oid]) ) if (page + 1) * BATCH_SIZE < max_count: data['next_page'] = page + 2 data['previous_page'] = page if n == 0 and not oc: # On the first page and no category filtering. # Then, load only the first two posts and tell the template # to render the other remaining ones later data['rest'] = {'from_index': 2, 'to_index': m} m = 2 else: data['rest'] = None data['blogitems'] = ( qs .prefetch_related('categories') .order_by('-pub_date') )[n:m] return render(request, 'homepage/home.html', data)
def handle(self, *args, **options): if cache.get('nodomains-queued'): return for queued in models.Queued.objects.all().order_by('add_date'): cache.set('nodomains-queued', True, 100) try: then = utc_now() - datetime.timedelta(days=1) models.Result.objects.get( url=queued.url, add_date__gt=then ) print "Skipping", queued.url except models.Result.DoesNotExist: print queued.url run_url(queued.url) queued.delete() cache.delete('nodomains-queued')
def home(request, oc=None): context = {} qs = BlogItem.objects.filter(pub_date__lt=utc_now()) if oc is not None: if not oc: # empty string return redirect('/', permanent=True) categories = parse_ocs_to_categories(oc) cat_q = make_categories_q(categories) qs = qs.filter(cat_q) context['categories'] = categories # Reasons for not being here if request.method == 'HEAD': return http.HttpResponse('') try: redis_increment('homepage:misses', request) except Exception: logger.error('Unable to redis.zincrby', exc_info=True) BATCH_SIZE = 10 try: page = max(1, int(request.GET.get('page', 1))) - 1 except ValueError: raise http.Http404('invalid page value') n, m = page * BATCH_SIZE, (page + 1) * BATCH_SIZE max_count = qs.count() first_post, = qs.order_by('-pub_date')[:1] context['first_post_url'] = request.build_absolute_uri( reverse('blog_post', args=[first_post.oid]) ) if (page + 1) * BATCH_SIZE < max_count: context['next_page'] = page + 2 context['previous_page'] = page context['blogitems'] = ( qs .prefetch_related('categories') .order_by('-pub_date') )[n:m] if page > 0: # page starts on 0 context['page_title'] = 'Page {}'.format(page + 1) return render(request, 'homepage/home.html', context)
def test_old_redirects(self): blog = BlogItem.objects.create( oid='myoid', title='TITLEX', text=""" ttest test """, display_format='structuredtext', pub_date=utc_now() - datetime.timedelta(seconds=10), ) url = reverse('blog_post', args=[blog.oid]) response = self.client.get(url) assert response.status_code == 200 response = self.client.get(url, {'replypath': 'foo'}) self.assertEqual(response.status_code, 301) self.assertEqual(urlparse(response['location']).path, url) self.assertTrue(not urlparse(response['location']).query)
def handle(self, *args, **options): now = utc_now() verbose = int(options['verbosity']) > 1 connection = get_redis_connection('titles') connection.flushdb() search_index = RedisSearchIndex(connection) for plog in models.BlogItem.objects.filter(pub_date__lte=now).order_by('?'): if verbose: print repr(plog.title), # print search_index.add_item(plog.id, plog.title, 1) try: hits = models.BlogItemHits.objects.get(oid=plog.oid).hits except models.BlogItemHits.DoesNotExist: hits = 1 result = search_index.add_item(plog.oid, plog.title, hits), hits if verbose: print result
def get_data(max_length=1000, pub_date_format=None, offset=0): items = [] category_names = dict((x.id, x.name) for x in Category.objects.all()) categories = defaultdict(list) for e in BlogItem.categories.through.objects.all(): categories[e.blogitem_id].append( category_names[e.category_id] ) qs = BlogItem.objects.filter(pub_date__lt=utc_now()).order_by('-pub_date') for item in qs[offset:max_length]: pub_date = item.pub_date if pub_date_format: pub_date = pub_date_format(pub_date) items.append({ 'title': item.title, 'slug': item.oid, 'pub_date': pub_date, 'keywords': [x for x in item.keywords if x][:3], 'categories': categories[item.id][:3], }) return items
def test_blog_post_with_comment_approval(self): blog = BlogItem.objects.create( oid='some-longish-test-post', title='TITLEX', text='BLABLABLA', display_format='structuredtext', pub_date=utc_now() - datetime.timedelta(seconds=10), ) url = reverse('blog_post', args=[blog.oid]) self._login() loggedin = self.client anonymous = Client() assert len(loggedin.cookies) assert not len(anonymous.cookies) comment = BlogComment.objects.create( oid='a1000', blogitem=blog, comment='COMMENTX', name='Mr Anonymous', ) # but it hasn't been approved yet response = anonymous.get(url) self.assertEqual(response.status_code, 200) self.assertTrue('COMMENTX' not in response.content) # let's approve it! approve_url = reverse('approve_comment', args=[blog.oid, comment.oid]) response = loggedin.post( approve_url, HTTP_X_REQUESTED_WITH='XMLHttpRequest' ) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, 'OK') response = anonymous.get(url) self.assertEqual(response.status_code, 200) self.assertTrue('COMMENTX' in response.content)
def handle(self, *args, **options): if cache.get('nodomains-queued'): return queued = models.Queued.objects.filter(failed_attempts__lt=5) for queued in queued.order_by('add_date'): cache.set('nodomains-queued', True, 100) try: then = utc_now() - datetime.timedelta(days=1) models.Result.objects.get( url=queued.url, add_date__gt=then ) print "Skipping", queued.url except models.Result.DoesNotExist: print queued.url try: run_url(queued.url) except Exception: queued.failed_attempts += 1 queued.save() continue queued.delete() cache.delete('nodomains-queued')
def test_homepage_cache_rendering(self): url = reverse('home') blog1 = BlogItem.objects.create( title='TITLE1', text='BLABLABLA', display_format='structuredtext', pub_date=utc_now() - datetime.timedelta(seconds=10), ) comment1 = BlogComment.objects.create( comment="textext", blogitem=blog1, approved=True, ) comment2 = BlogComment.objects.create( comment="tuxtuxt", blogitem=blog1, approved=True, ) response = self.client.get(url) self.assertTrue('TITLE1' in response.content) self.assertTrue('2 comments' in response.content) blog1.title = 'TUTLE1' blog1.save() response = self.client.get(url) self.assertTrue('TUTLE1' in response.content) blog2 = BlogItem.objects.create( title='TATLE2', text='BLEBLE', display_format='structuredtext', pub_date=utc_now() - datetime.timedelta(seconds=1), ) response = self.client.get(url) self.assertTrue('TATLE2' in response.content) self.assertTrue('0 comments' in response.content) self.assertTrue('TUTLE1' in response.content) self.assertTrue('2 comments' in response.content) # by categories only cat1 = Category.objects.create( name='CATEGORY1', ) cat2 = Category.objects.create( name='CATEGORY2', ) blog1.categories.add(cat1) blog1.save() blog2.categories.add(cat2) blog2.save() response = self.client.get(url) self.assertTrue('CATEGORY1' in response.content) self.assertTrue('CATEGORY2' in response.content) url = reverse('only_category', args=['CATEGORY2']) response = self.client.get(url) self.assertTrue('CATEGORY1' not in response.content) self.assertTrue('CATEGORY2' in response.content) url = reverse('only_category', args=['CATEGORY1']) response = self.client.get(url) self.assertTrue('CATEGORY1' in response.content) self.assertTrue('CATEGORY2' not in response.content) for i in range(2, 21): BlogItem.objects.create( title='TITLE-%s' % i, text='BLEBLE', display_format='structuredtext', pub_date=utc_now() - datetime.timedelta(seconds=20 + i), ) url = reverse('home') response = self.client.get(url) assert '?page=2' in response.content visible_titles = [] not_visible_titles = [] for item in BlogItem.objects.all(): if item.title in response.content: visible_titles.append(item.title) else: not_visible_titles.append(item.title) response = self.client.get(url, {'page': 2}) for each in visible_titles[:10]: assert each not in response.content for each in not_visible_titles[:10]: assert each in response.content assert '?page=1' in response.content assert '?page=3' in response.content
def search(request): data = {} search = request.GET.get('q', '') if len(search) > 90: return http.HttpResponse("Search too long") documents = [] data['base_url'] = 'http://%s' % RequestSite(request).domain tag_strip = re.compile('<[^>]+>') def append_match(item, words): text = item.rendered text = tag_strip.sub(' ', text) sentences = [] def matcher(match): return '<b>%s</b>' % match.group() if regex: for each in regex.finditer(text): sentence = text[max(each.start() - 35, 0): each.end() + 40] sentence = regex_ext.sub(matcher, sentence) sentence = sentence.strip() if each.start() > 0 and not sentence[0].isupper(): sentence = '...%s' % sentence if each.end() < len(text): sentence = '%s...' % sentence sentences.append(sentence.strip()) if len(sentences) > 3: break if isinstance(item, BlogItem): title = html_escape(item.title) if regex_ext: title = regex_ext.sub(matcher, title) date = item.pub_date type_ = 'blog' else: if not item.blogitem: item.correct_blogitem_parent() title = ( "Comment on <em>%s</em>" % html_escape(item.blogitem.title) ) date = item.add_date type_ = 'comment' documents.append({ 'title': title, 'summary': '<br>'.join(sentences), 'date': date, 'url': item.get_absolute_url(), 'type': type_, }) def create_search(s): words = re.findall('\w+', s) words_orig = words[:] if 'or' in words: which = words.index('or') words_orig.remove('or') if (which + 1) < len(words) and which > 0: before = words.pop(which - 1) words.pop(which - 1) after = words.pop(which - 1) words.insert(which - 1, '%s | %s' % (before, after)) while 'and' in words_orig: words_orig.remove('and') while 'and' in words: words.remove('and') escaped = ' & '.join(words) return escaped, words_orig data['q'] = search keyword_search = {} if len(search) > 1: _keyword_keys = ('keyword', 'keywords', 'category', 'categories') search, keyword_search = split_search(search, _keyword_keys) redis = get_redis_connection(reconnection_wrapped=True) not_ids = defaultdict(set) times = [] search_times = [] count_documents = [] regex = regex_ext = None def append_queryset_search(queryset, order_by, words, model_name): count = items.count() count_documents.append(count) for item in items.order_by(order_by)[:20]: append_match(item, words) not_ids[model_name].add(item.pk) return count now = utc_now() if len(search) > 1: search_escaped, words = create_search(search) regex = re.compile( r'\b(%s)' % '|'.join( re.escape(word) for word in words if word.lower() not in STOPWORDS ), re.I | re.U ) regex_ext = re.compile( r'\b(%s\w*)\b' % '|'.join( re.escape(word) for word in words if word.lower() not in STOPWORDS ), re.I | re.U ) for model in (BlogItem, BlogComment): qs = model.objects model_name = model._meta.object_name if model == BlogItem: qs = qs.filter(pub_date__lte=now) fields = ('title', 'text') order_by = '-pub_date' if keyword_search.get('keyword'): # use Redis! ids = redis.smembers('kw:%s' % keyword_search['keyword']) if ids: qs = qs.filter(pk__in=ids) if keyword_search.get('keywords'): # use Redis! ids = [] for each in [x.strip() for x in keyword_search['keywords'].split(',') if x.strip()]: ids.extend(redis.smembers('kw:%s' % each)) if ids: qs = qs.filter(pk__in=ids) elif model == BlogComment: fields = ('comment',) order_by = '-add_date' _specials = ('keyword', 'keywords', 'category', 'categories') if any(keyword_search.get(k) for k in _specials): # BlogComments don't have this keyword so it can # never match continue for field in fields: if not_ids[model_name]: qs = qs.exclude(pk__in=not_ids[model_name]) _sql = "to_tsvector('english'," + field + ") " if ' | ' in search_escaped or ' & ' in search_escaped: _sql += "@@ to_tsquery('english', %s)" else: _sql += "@@ plainto_tsquery('english', %s)" items = qs.extra(where=[_sql], params=[search_escaped]) t0 = time.time() count = append_queryset_search( items, order_by, words, model_name ) t1 = time.time() times.append('%s to find %s %ss by field %s' % ( t1 - t0, count, model_name, field )) search_times.append(t1-t0) logger.info('Searchin for %r:\n%s' % (search, '\n'.join(times))) elif keyword_search and any(keyword_search.values()): t0 = time.time() if keyword_search.get('keyword') or keyword_search.get('keywords'): if keyword_search.get('keyword'): ids = redis.smembers('kw:%s' % keyword_search['keyword']) else: ids = [] for each in [x.strip() for x in keyword_search.get('keywords').split(',') if x.strip()]: ids.extend(redis.smembers('kw:%s' % each)) if ids: items = BlogItem.objects.filter(pk__in=ids) model_name = BlogItem._meta.object_name append_queryset_search(items, '-pub_date', [], model_name) if keyword_search.get('category') or keyword_search.get('categories'): if keyword_search.get('category'): categories = Category.objects.filter( name=keyword_search.get('category') ) else: cats = [x.strip() for x in keyword_search.get('categories').split(',') if x.strip()] categories = Category.objects.filter(name__in=cats) if categories: cat_q = make_categories_q(categories) items = BlogItem.objects.filter(cat_q) model_name = BlogItem._meta.object_name append_queryset_search(items, '-pub_date', [], model_name) t1 = time.time() search_times.append(t1 - t0) data['search_time'] = sum(search_times) count_documents_shown = len(documents) data['documents'] = documents data['count_documents'] = sum(count_documents) data['count_documents_shown'] = count_documents_shown data['better'] = None if not data['count_documents']: _qterms = len(data['q'].split()) if ' or ' not in data['q'] and _qterms > 1 and _qterms < 5: data['better'] = data['q'].replace(' ', ' or ') if data['better']: data['better_url'] = ( reverse('search') + '?' + urllib.urlencode({'q': data['better'].encode('utf-8')}) ) if not data['q']: page_title = 'Search' elif data['count_documents'] == 1: page_title = '1 thing found' else: page_title = '%s things found' % data['count_documents'] if count_documents_shown < data['count_documents']: if count_documents_shown == 1: page_title += ' (but only 1 thing shown)' else: page_title += ' (but only %s things shown)' % count_documents_shown data['page_title'] = page_title if ( not data['count_documents'] and len(search.split()) == 1 and not keyword_search ): if redis.smembers('kw:%s' % search): url = reverse('search') url += '?' + urllib.urlencode({'q': 'keyword:%s' % search}) return redirect(url) return render(request, 'homepage/search.html', data)
def items(self, categories): qs = (BlogItem.objects .filter(pub_date__lt=utc_now())) if categories: qs = qs.filter(make_categories_q(categories)) return qs.order_by('-pub_date')[:10]