def page_ocr_txt(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) try: text = page.ocr.text return HttpResponse(text, content_type='text/plain') except models.OCR.DoesNotExist: raise Http404("No OCR for %s" % page)
def openoni_topic(request, topic_id): topic = get_object_or_404(models.Topic, pk=topic_id) page_title = topic.name crumbs = list(settings.BASE_CRUMBS) if urlresolvers.reverse('recommended_topics') in request.META.get('HTTP_REFERER'): crumbs.extend([{'label': 'Recommended Topics', 'href': urlresolvers.reverse('recommended_topics')}, {'label': topic.name, 'href': urlresolvers.reverse('openoni_topic', kwargs={'topic_id': topic.pk})}]) else: referer = re.sub('^https?:\/\/', '', request.META.get('HTTP_REFERER')).split('/') try: lccn, date, edition, sequence = referer[2], referer[3], referer[4][-1], referer[5][-1] page = get_page(lccn, date, edition, sequence) if page: title, issue, page = _get_tip(lccn, date, edition, sequence) crumbs = create_crumbs(title, issue, date, edition, page) crumbs.extend([{'label': topic.name, 'href': urlresolvers.reverse('openoni_topic', kwargs={'topic_id': topic.pk})}]) except: pass important_dates = filter(lambda s: not s.isspace(), topic.important_dates.split('\n ')) search_suggestions = topic.suggested_search_terms.split('\t') openoni_pages = [{'title': t.title, 'description': t.description.lstrip(t.title), 'url': t.url} for t in topic.topicpages_set.all()] return render_to_response('topic.html', dictionary=locals(), context_instance=RequestContext(request))
def page_ocr(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) crumbs = create_crumbs(title, issue, date, edition, page) host = request.get_host() return render_to_response('page_text.html', dictionary=locals(), context_instance=RequestContext(request))
def issue_pages_rdf(request, lccn, date, edition): title, issue, page = _get_tip(lccn, date, edition) graph = issue_to_graph(issue) response = HttpResponse(graph.serialize(base=_rdf_base(request), include_base=True), content_type='application/rdf+xml') return response
def page_json(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) host = request.get_host() if page: return HttpResponse(page.json(host=host), content_type='application/json') else: return HttpResponseNotFound()
def page(request, lccn, date, edition, sequence, words=None): fragments = [] if words: fragments.append("words=" + words) qs = request.META.get('QUERY_STRING') if qs: fragments.append(qs) if fragments: path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) url = urlresolvers.reverse('openoni_page', kwargs=path_parts) return HttpResponseRedirect(url + "#" + "&".join(fragments)) title, issue, page = _get_tip(lccn, date, edition, sequence) if not page.jp2_filename: notes = page.notes.filter(type="noteAboutReproduction") num_notes = notes.count() if num_notes >= 1: explanation = notes[0].text else: explanation = "" # if no word highlights were requests, see if the user came # from search engine results and attempt to highlight words from their # query by redirecting to a url that has the highlighted words in it if not words: try: words = _search_engine_words(request) words = '+'.join(words) if len(words) > 0: path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence, words=words) url = urlresolvers.reverse('openoni_page_words', kwargs=path_parts) return HttpResponseRedirect(url) except Exception, e: if settings.DEBUG: raise e
def page_ocr_xml(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) return _stream_file(page.ocr_abs_filename, 'application/xml')
def page_jp2(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) return _stream_file(page.jp2_abs_filename, 'image/jp2')
def page_pdf(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) return _stream_file(page.pdf_abs_filename, 'application/pdf')