def page_ocr_txt(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) try: text = get_page_text(page) except models.OCR.DoesNotExist: raise Http404("No OCR for %s" % page) response = HttpResponse(text, content_type='text/plain') return add_cache_tag(response, "lccn=%s" % lccn)
def page_ocr(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) crumbs = create_crumbs(title, issue, date, edition, page) host = request.get_host() text = get_page_text(page) response = render_to_response('page_text.html', dictionary=locals(), context_instance=RequestContext(request)) return add_cache_tag(response, "lccn=%s" % lccn)
def test_getting_text_from_solr_slovenia(self): """ tests get_page_text() with batch batch_iune_oriole_ver01. First creates a page object 'page' with _get_tip() then uses it as an argument to get_page_text() """ batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_iune_oriole_ver01') self.assertTrue(os.path.isdir(batch_dir)) loader = BatchLoader(process_ocr=True) batch = loader.load_batch(batch_dir) self.assertEqual(batch.name, 'batch_iune_oriole_ver01') title, issue, page = _get_tip('sn83045377', '1906-03-01', 1, 1) text = get_page_text(page) self.assertIn("Od Mizo in dale", text[0]) self.assertIn("To je preecj inoettii tobak! Marsi", text[0]) # purge the batch and make sure it's gone from the db loader.purge_batch('batch_iune_oriole_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual(Title.objects.get(lccn='sn83045377').has_issues, False)
def test_getting_text_from_solr_utah(self): """ tests get_page_text() with batch batch_uuml_thys_ver01. First creates a page object 'page' with _get_tip() then uses it as an argument to get_page_text() """ batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_uuml_thys_ver01') self.assertTrue(os.path.isdir(batch_dir)) loader = BatchLoader(process_ocr=True) batch = loader.load_batch(batch_dir) self.assertEqual(batch.name, 'batch_uuml_thys_ver01') title, issue, page = _get_tip('sn83045396', '1911-09-17', 1, 1) text = get_page_text(page) self.assertIn("Uc nice at tlio slate fair track", text[0]) self.assertIn("PAGES FIVE CENTS", text[0]) self.assertIn('gBter ho had left the grounds that', text[0]) # purge the batch and make sure it's gone from the db loader.purge_batch('batch_uuml_thys_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual(Title.objects.get(lccn='sn83045396').has_issues, False)
def test_getting_text_from_solr_slovenia(self): """ tests get_page_text() with batch batch_iune_oriole_ver01. First creates a page object 'page' with _get_tip() then uses it as an argument to get_page_text() """ batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_iune_oriole_ver01') self.assertTrue(os.path.isdir(batch_dir)) loader = BatchLoader(process_ocr=True) batch = loader.load_batch(batch_dir) self.assertEqual(batch.name, 'batch_iune_oriole_ver01') title, issue, page = _get_tip('sn83045377', '1906-03-01', 1, 1) text = get_page_text(page) self.assertIn("Od Mizo in dale", text[0]) self.assertIn("To je preecj inoettii tobak! Marsi", text[0]) # purge the batch and make sure it's gone from the db loader.purge_batch('batch_iune_oriole_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual( Title.objects.get(lccn='sn83045377').has_issues, False)
def test_getting_text_from_solr_utah(self): """ tests get_page_text() with batch batch_uuml_thys_ver01. First creates a page object 'page' with _get_tip() then uses it as an argument to get_page_text() """ batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_uuml_thys_ver01') self.assertTrue(os.path.isdir(batch_dir)) loader = BatchLoader(process_ocr=True) batch = loader.load_batch(batch_dir) self.assertEqual(batch.name, 'batch_uuml_thys_ver01') title, issue, page = _get_tip('sn83045396', '1911-09-17', 1, 1) text = get_page_text(page) self.assertIn("Uc nice at tlio slate fair track", text[0]) self.assertIn("PAGES FIVE CENTS", text[0]) self.assertIn('gBter ho had left the grounds that', text[0]) # purge the batch and make sure it's gone from the db loader.purge_batch('batch_uuml_thys_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual( Title.objects.get(lccn='sn83045396').has_issues, False)
def page(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) if not page.jp2_filename: notes = page.notes.filter(type="noteAboutReproduction") num_notes = notes.count() if num_notes >= 1: explanation = notes[0].text else: explanation = "" # see if the user came from search engine results and attempt to # highlight words from their query by redirecting to a url that # has the highlighted words in it try: words = _search_engine_words(request) words = '+'.join(words) if len(words) > 0: path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) url = '%s?%s#%s' % (urlresolvers.reverse('chronam_page_words', kwargs=path_parts), request.GET.urlencode(), words) response = HttpResponseRedirect(url) return add_cache_tag(response, "lccn=%s" % lccn) except Exception as exc: LOGGER.error( "Failed to add search highlighting based on the referred search engine query: %s", exc, exc_info=True) if settings.DEBUG: raise # else squish the exception so the page will still get # served up minus the highlights # Calculate the previous_issue_first_page. Note: it was decided # that we want to skip over issues with missing pages. See ticket # #383. _issue = issue while True: previous_issue_first_page = None _issue = _issue.previous if not _issue: break previous_issue_first_page = _issue.first_page if previous_issue_first_page: break # do the same as above but for next_issue this time. _issue = issue while True: next_issue_first_page = None _issue = _issue.next if not _issue: break next_issue_first_page = _issue.first_page if next_issue_first_page: break page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) page_head_heading = "%s, %s, %s" % (title.display_name, label(issue), label(page)) page_head_subheading = label(title) crumbs = create_crumbs(title, issue, date, edition, page) filename = page.jp2_abs_filename if filename: try: im = os.path.getsize(filename) image_size = filesizeformat(im) except OSError: image_size = "Unknown" image_credit = issue.batch.awardee.name host = request.get_host() profile_uri = 'http://www.openarchives.org/ore/html/' template = "page.html" text = get_page_text(page) response = render_to_response(template, dictionary=locals(), context_instance=RequestContext(request)) return add_cache_tag(response, "lccn=%s" % lccn)
def page(request, lccn, date, edition, sequence): title, issue, page = _get_tip(lccn, date, edition, sequence) if not page.jp2_filename: notes = page.notes.filter(type="noteAboutReproduction") num_notes = notes.count() if num_notes >= 1: explanation = notes[0].text else: explanation = "" # see if the user came from search engine results and attempt to # highlight words from their query by redirecting to a url that # has the highlighted words in it try: words = _search_engine_words(request) words = '+'.join(words) if len(words) > 0: path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence) url = '%s?%s#%s' % (urlresolvers.reverse('chronam_page_words', kwargs=path_parts), request.GET.urlencode(), words) response = HttpResponseRedirect(url) return add_cache_tag(response, "lccn=%s" % lccn) except Exception as exc: LOGGER.error("Failed to add search highlighting based on the referred search engine query: %s", exc, exc_info=True) if settings.DEBUG: raise # else squish the exception so the page will still get # served up minus the highlights # Calculate the previous_issue_first_page. Note: it was decided # that we want to skip over issues with missing pages. See ticket # #383. _issue = issue while True: previous_issue_first_page = None _issue = _issue.previous if not _issue: break previous_issue_first_page = _issue.first_page if previous_issue_first_page: break # do the same as above but for next_issue this time. _issue = issue while True: next_issue_first_page = None _issue = _issue.next if not _issue: break next_issue_first_page = _issue.first_page if next_issue_first_page: break page_title = "%s, %s, %s" % (label(title), label(issue), label(page)) page_head_heading = "%s, %s, %s" % (title.display_name, label(issue), label(page)) page_head_subheading = label(title) crumbs = create_crumbs(title, issue, date, edition, page) filename = page.jp2_abs_filename if filename: try: im = os.path.getsize(filename) image_size = filesizeformat(im) except OSError: image_size = "Unknown" image_credit = issue.batch.awardee.name host = request.get_host() profile_uri = 'http://www.openarchives.org/ore/html/' template = "page.html" text = get_page_text(page) response = render_to_response(template, dictionary=locals(), context_instance=RequestContext(request)) return add_cache_tag(response, "lccn=%s" % lccn)