def issue_pages_json(request, lccn, date, edition): title, issue, page = _get_tip(lccn, date, edition) host = request.get_host() if issue: return HttpResponse(issue.json(host=host), mimetype='application/json') else: return HttpResponseNotFound()
def page_ocr_txt(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) try: text = page.ocr.text return HttpResponse(text, mimetype='text/plain') except models.OCR.DoesNotExist, e: raise Http404("No OCR for %s" % page)
def issue_pages_rdf(request, lccn, date, edition): title, issue, page = _get_tip(lccn, date, edition) graph = issue_to_graph(issue) response = HttpResponse(graph.serialize(base=_rdf_base(request), include_base=True), content_type='application/rdf+xml') return add_cache_tag(response, "lccn=%s" % lccn)
def issue_pages_rdf(request, lccn, date, edition): title, issue, page = _get_tip(lccn, date, edition) graph = issue_to_graph(issue) response = HttpResponse(graph.serialize(base=_rdf_base(request), include_base=True), mimetype='application/rdf+xml') return response
def page_json(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) host = request.get_host() if page: return HttpResponse(page.json(host=host), mimetype='application/json') else: return HttpResponseNotFound()
def page_ocr(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) crumbs = create_crumbs(title, issue, date, edition, page) host = request.get_host() return render_to_response('page_text.html', dictionary=locals(), context_instance=RequestContext(request))
def page_json(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) host = request.get_host() if page: return HttpResponse(page.json(host=host), content_type='application/json') else: return HttpResponseNotFound()
def chronam_topic(request, topic_id): topic = get_object_or_404(models.Topic, pk=topic_id) page_title = topic.name crumbs = list(settings.BASE_CRUMBS) if urlresolvers.reverse('recommended_topics') in request.META.get('HTTP_REFERER'): crumbs.extend([{'label': 'Recommended Topics', 'href': urlresolvers.reverse('recommended_topics')}, {'label': topic.name, 'href': urlresolvers.reverse('chronam_topic', kwargs={'topic_id': topic.pk})}]) else: referer = re.sub('^https?:\/\/', '', request.META.get('HTTP_REFERER')).split('/') try: lccn, date, edition, sequence = referer[2], referer[3], referer[4][-1], referer[5][-1] page = get_page(lccn, date, edition, sequence) if page: title, issue, page = _get_tip(lccn, date, edition, sequence) crumbs = create_crumbs(title, issue, date, edition, page) crumbs.extend([{'label': topic.name, 'href': urlresolvers.reverse('chronam_topic', kwargs={'topic_id': topic.pk})}]) except: pass important_dates = filter(lambda s: not s.isspace(), topic.important_dates.split('\n ')) search_suggestions = topic.suggested_search_terms.split('\t') chronam_pages = [{'title': t.title, 'description': t.description.lstrip(t.title), 'url': t.url} for t in topic.topicpages_set.all()] return render_to_response('topic.html', dictionary=locals(), context_instance=RequestContext(request))
def page_ocr_xml(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) if page.ocr_abs_filename: response = sendfile(request, page.ocr_abs_filename) return add_cache_tag(response, "lccn=%s" % lccn) else: raise Http404("No ocr for page %s" % page)
def issue_pages_json(request, lccn, date, edition): title, issue, page = _get_tip(lccn, date, edition) if issue: return HttpResponse(issue.json(request), content_type="application/json") else: return HttpResponseNotFound()
def page_ocr_txt(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) try: text = get_page_text(page) except models.OCR.DoesNotExist: raise Http404("No OCR for %s" % page) response = HttpResponse(text, content_type='text/plain') return add_cache_tag(response, "lccn=%s" % lccn)
def page_ocr_txt(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) if not issue.batch.api_available: return HttpResponse(status=403) else: try: text = page.ocr.text return HttpResponse(text, content_type='text/plain') except models.OCR.DoesNotExist: raise Http404("No OCR for %s" % page)
def test_getting_text_from_solr_slovenia(self): """ tests get_page_text() with batch batch_iune_oriole_ver01. First creates a page object 'page' with _get_tip() then uses it as an argument to get_page_text() """ batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_iune_oriole_ver01') self.assertTrue(os.path.isdir(batch_dir)) loader = BatchLoader(process_ocr=True) batch = loader.load_batch(batch_dir) self.assertEqual(batch.name, 'batch_iune_oriole_ver01') title, issue, page = _get_tip('sn83045377', '1906-03-01', 1, 1) text = get_page_text(page) self.assertIn("Od Mizo in dale", text[0]) self.assertIn("To je preecj inoettii tobak! Marsi", text[0]) # purge the batch and make sure it's gone from the db loader.purge_batch('batch_iune_oriole_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual(Title.objects.get(lccn='sn83045377').has_issues, False)
def page(request, lccn, date, edition, sequence, words=None): fragments = [] if words: fragments.append("words=" + words) qs = request.META.get('QUERY_STRING') if qs: fragments.append(qs) if fragments: path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) url = urlresolvers.reverse('chronam_page', kwargs=path_parts) return HttpResponseRedirect(url + "#" + "&".join(fragments)) title, issue, page = _get_tip(lccn, date, edition, sequence) if not page.jp2_filename: notes = page.notes.filter(type="noteAboutReproduction") num_notes = notes.count() if num_notes >= 1: explanation = notes[0].text else: explanation = "" # if no word highlights were requests, see if the user came # from search engine results and attempt to highlight words from their # query by redirecting to a url that has the highlighted words in it if not words: try: words = _search_engine_words(request) words = '+'.join(words) if len(words) > 0: path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence, words=words) url = urlresolvers.reverse('chronam_page_words', kwargs=path_parts) return HttpResponseRedirect(url) except Exception, e: if settings.DEBUG: raise e
def test_getting_text_from_solr_utah(self): """ tests get_page_text() with batch batch_uuml_thys_ver01. First creates a page object 'page' with _get_tip() then uses it as an argument to get_page_text() """ batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_uuml_thys_ver01') self.assertTrue(os.path.isdir(batch_dir)) loader = BatchLoader(process_ocr=True) batch = loader.load_batch(batch_dir) self.assertEqual(batch.name, 'batch_uuml_thys_ver01') title, issue, page = _get_tip('sn83045396', '1911-09-17', 1, 1) text = get_page_text(page) self.assertIn("Uc nice at tlio slate fair track", text[0]) self.assertIn("PAGES FIVE CENTS", text[0]) self.assertIn('gBter ho had left the grounds that', text[0]) # purge the batch and make sure it's gone from the db loader.purge_batch('batch_uuml_thys_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual(Title.objects.get(lccn='sn83045396').has_issues, False)
def test_getting_text_from_solr_slovenia(self): """ tests get_page_text() with batch batch_iune_oriole_ver01. First creates a page object 'page' with _get_tip() then uses it as an argument to get_page_text() """ batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_iune_oriole_ver01') self.assertTrue(os.path.isdir(batch_dir)) loader = BatchLoader(process_ocr=True) batch = loader.load_batch(batch_dir) self.assertEqual(batch.name, 'batch_iune_oriole_ver01') title, issue, page = _get_tip('sn83045377', '1906-03-01', 1, 1) text = get_page_text(page) self.assertIn("Od Mizo in dale", text[0]) self.assertIn("To je preecj inoettii tobak! Marsi", text[0]) # purge the batch and make sure it's gone from the db loader.purge_batch('batch_iune_oriole_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual( Title.objects.get(lccn='sn83045377').has_issues, False)
def test_getting_text_from_solr_utah(self): """ tests get_page_text() with batch batch_uuml_thys_ver01. First creates a page object 'page' with _get_tip() then uses it as an argument to get_page_text() """ batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_uuml_thys_ver01') self.assertTrue(os.path.isdir(batch_dir)) loader = BatchLoader(process_ocr=True) batch = loader.load_batch(batch_dir) self.assertEqual(batch.name, 'batch_uuml_thys_ver01') title, issue, page = _get_tip('sn83045396', '1911-09-17', 1, 1) text = get_page_text(page) self.assertIn("Uc nice at tlio slate fair track", text[0]) self.assertIn("PAGES FIVE CENTS", text[0]) self.assertIn('gBter ho had left the grounds that', text[0]) # purge the batch and make sure it's gone from the db loader.purge_batch('batch_uuml_thys_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual( Title.objects.get(lccn='sn83045396').has_issues, False)
def page(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) if not page.jp2_filename: notes = page.notes.filter(type="noteAboutReproduction") num_notes = notes.count() if num_notes >= 1: explanation = notes[0].text else: explanation = "" # see if the user came from search engine results and attempt to # highlight words from their query by redirecting to a url that # has the highlighted words in it try: words = _search_engine_words(request) words = '+'.join(words) if len(words) > 0: path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) url = '%s?%s#%s' % (urlresolvers.reverse('chronam_page_words', kwargs=path_parts), request.GET.urlencode(), words) response = HttpResponseRedirect(url) return add_cache_tag(response, "lccn=%s" % lccn) except Exception as exc: LOGGER.error( "Failed to add search highlighting based on the referred search engine query: %s", exc, exc_info=True) if settings.DEBUG: raise # else squish the exception so the page will still get # served up minus the highlights # Calculate the previous_issue_first_page. Note: it was decided # that we want to skip over issues with missing pages. See ticket # #383. _issue = issue while True: previous_issue_first_page = None _issue = _issue.previous if not _issue: break previous_issue_first_page = _issue.first_page if previous_issue_first_page: break # do the same as above but for next_issue this time. _issue = issue while True: next_issue_first_page = None _issue = _issue.next if not _issue: break next_issue_first_page = _issue.first_page if next_issue_first_page: break page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) page_head_heading = "%s, %s, %s" % (title.display_name, label(issue), label(page)) page_head_subheading = label(title) crumbs = create_crumbs(title, issue, date, edition, page) filename = page.jp2_abs_filename if filename: try: im = os.path.getsize(filename) image_size = filesizeformat(im) except OSError: image_size = "Unknown" image_credit = issue.batch.awardee.name host = request.get_host() profile_uri = 'http://www.openarchives.org/ore/html/' template = "page.html" text = get_page_text(page) response = render_to_response(template, dictionary=locals(), context_instance=RequestContext(request)) return add_cache_tag(response, "lccn=%s" % lccn)
def page_ocr_xml(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) return _stream_file(page.ocr_abs_filename, 'application/xml')
def page_jp2(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) return _stream_file(page.jp2_abs_filename, 'image/jp2')
def page_pdf(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) return _stream_file(page.pdf_abs_filename, 'application/pdf')
def page(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) if not page.jp2_filename: notes = page.notes.filter(type="noteAboutReproduction") num_notes = notes.count() if num_notes >= 1: explanation = notes[0].text else: explanation = "" # see if the user came from search engine results and attempt to # highlight words from their query by redirecting to a url that # has the highlighted words in it try: words = _search_engine_words(request) words = '+'.join(words) if len(words) > 0: path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) url = '%s?%s#%s' % (urlresolvers.reverse('chronam_page_words', kwargs=path_parts), request.GET.urlencode(), words) response = HttpResponseRedirect(url) return add_cache_tag(response, "lccn=%s" % lccn) except Exception as exc: LOGGER.error("Failed to add search highlighting based on the referred search engine query: %s", exc, exc_info=True) if settings.DEBUG: raise # else squish the exception so the page will still get # served up minus the highlights # Calculate the previous_issue_first_page. Note: it was decided # that we want to skip over issues with missing pages. See ticket # #383. _issue = issue while True: previous_issue_first_page = None _issue = _issue.previous if not _issue: break previous_issue_first_page = _issue.first_page if previous_issue_first_page: break # do the same as above but for next_issue this time. _issue = issue while True: next_issue_first_page = None _issue = _issue.next if not _issue: break next_issue_first_page = _issue.first_page if next_issue_first_page: break page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) page_head_heading = "%s, %s, %s" % (title.display_name, label(issue), label(page)) page_head_subheading = label(title) crumbs = create_crumbs(title, issue, date, edition, page) filename = page.jp2_abs_filename if filename: try: im = os.path.getsize(filename) image_size = filesizeformat(im) except OSError: image_size = "Unknown" image_credit = issue.batch.awardee.name host = request.get_host() profile_uri = 'http://www.openarchives.org/ore/html/' template = "page.html" text = get_page_text(page) response = render_to_response(template, dictionary=locals(), context_instance=RequestContext(request)) return add_cache_tag(response, "lccn=%s" % lccn)
def page_ocr_xml(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) if not issue.batch.api_available: return HttpResponse(status=403) else: return _stream_file(page.ocr_abs_filename, 'application/xml')
def page_jp2(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) if not issue.batch.api_available: return HttpResponse(status=403) else: return _stream_file(page.jp2_abs_filename, 'image/jp2')