def authors(report=False, dryrun=False, force=False): index = set_hosts_index() logprint('debug', '------------------------------------------------------------------------') logprint('debug', 'getting mw_authors...') mw_author_titles = Proxy().authors(cached_ok=False) mw_articles = Proxy().articles_lastmod() logprint('debug', 'getting es_authors...') es_authors = Author.authors() if force: logprint('debug', 'forcibly update all authors') authors_new = [page['title'] for page in es_authors] authors_delete = [] else: logprint('debug', 'determining new,delete...') authors_new,authors_delete = Elasticsearch.authors_to_update( mw_author_titles, mw_articles, es_authors) logprint('debug', 'mediawiki authors: %s' % len(mw_author_titles)) logprint('debug', 'authors to add: %s' % len(authors_new)) logprint('debug', 'authors to delete: %s' % len(authors_delete)) if report: return logprint('debug', 'deleting...') for n,title in enumerate(authors_delete): logprint('debug', '--------------------') logprint('debug', '%s/%s %s' % (n, len(authors_delete), title)) author = Author.get(title) if not dryrun: author.delete() logprint('debug', 'adding...') for n,title in enumerate(authors_new): logprint('debug', '--------------------') logprint('debug', '%s/%s %s' % (n, len(authors_new), title)) logprint('debug', 'getting from mediawiki') mwauthor = Proxy().page(title) try: existing_author = Author.get(title) logprint('debug', 'exists in elasticsearch') except: existing_author = None logprint('debug', 'creating author') author = Author.from_mw(mwauthor, author=existing_author) if not dryrun: logprint('debug', 'saving') author.save() try: a = Author.get(title) except NotFoundError: logprint('error', 'ERROR: Author(%s) NOT SAVED!' % title) logprint('debug', 'DONE')
def create_index(): index = set_hosts_index() logprint('debug', 'creating new index') index = Index(settings.DOCSTORE_INDEX) index.create() logprint('debug', 'creating mappings') Author.init() Page.init() Source.init() logprint('debug', 'registering doc types') index.doc_type(Author) index.doc_type(Page) index.doc_type(Source) logprint('debug', 'DONE')
def author(request, url_title, template_name="wikiprox/author.html"): try: author = Author.get(url_title) author.scrub() except NotFoundError: raise Http404 return render_to_response(template_name, {"author": author}, context_instance=RequestContext(request))
def author(request, url_title, format=None): """DOCUMENTATION GOES HERE. """ try: author = Author.get(url_title) except NotFoundError: return Response(status=status.HTTP_404_NOT_FOUND) articles = [ { 'title': article.title, 'url': reverse('wikiprox-api-page', args=([article.url_title]), request=request), } for article in author.articles() ] data = { 'url_title': author.url_title, 'url': reverse('wikiprox-api-author', args=([author.url_title]), request=request), 'absolute_url': reverse('wikiprox-author', args=([author.url_title]), request=request), 'title': author.title, 'title_sort': author.title_sort, 'body': author.body, 'modified': author.modified, 'articles': articles, } return Response(data)
def article(request, url_title="index", printed=False, template_name="wikiprox/page.html"): """ """ alt_title = url_title.replace("_", " ") try: page = Page.get(url_title) page.scrub() except NotFoundError: page = None if not page: try: page = Page.get(alt_title) page.scrub() except NotFoundError: page = None if not page: # might be an author author_titles = [author.title for author in Author.authors()] if url_title in author_titles: return HttpResponseRedirect(reverse("wikiprox-author", args=[url_title])) elif alt_title in author_titles: return HttpResponseRedirect(reverse("wikiprox-author", args=[alt_title])) raise Http404 if (not page.published) and (not settings.MEDIAWIKI_SHOW_UNPUBLISHED): template_name = "wikiprox/unpublished.html" elif printed: template_name = "wikiprox/article-print.html" else: template_name = "wikiprox/article.html" # DDR objects # show small number of objects, distributed among topics TOTAL_OBJECTS = 10 PAGE_OBJECTS = 8 try: terms_objects = page.ddr_terms_objects(size=TOTAL_OBJECTS) ddr_error = None except requests.exceptions.ConnectionError: terms_objects = [] ddr_error = "ConnectionError" except requests.exceptions.Timeout: terms_objects = [] ddr_error = "Timeout" ddr_objects = ddr.distribute_list(terms_objects, PAGE_OBJECTS) ddr_objects_width = 280 ddr_img_width = ddr_objects_width / (PAGE_OBJECTS / 2) return render_to_response( template_name, { "page": page, "ddr_error": ddr_error, "ddr_objects": ddr_objects, "ddr_objects_width": ddr_objects_width, "ddr_img_width": ddr_img_width, }, context_instance=RequestContext(request), )
def authors(request, format=None): """DOCUMENTATION GOES HERE. """ data = [ { 'title': author.title, 'title_sort': author.title_sort, 'url': reverse('wikiprox-api-author', args=([author.url_title]), request=request), } for author in Author.authors() ] return Response(data)
def authors(request, template_name="wikiprox/authors.html"): return render_to_response( template_name, {"authors": Author.authors(num_columns=4)}, context_instance=RequestContext(request) )