Example #1
0
def issue_pages_json(request, lccn, date, edition):
    title, issue, page = _get_tip(lccn, date, edition)
    host = request.get_host()
    if issue:
        return HttpResponse(issue.json(host=host), mimetype='application/json')
    else:
        return HttpResponseNotFound()
Example #2
0
def page_ocr_txt(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    try:
        text = page.ocr.text
        return HttpResponse(text, mimetype='text/plain')
    except models.OCR.DoesNotExist, e:
        raise Http404("No OCR for %s" % page)
Example #3
0
def issue_pages_rdf(request, lccn, date, edition):
    title, issue, page = _get_tip(lccn, date, edition)
    graph = issue_to_graph(issue)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            content_type='application/rdf+xml')
    return add_cache_tag(response, "lccn=%s" % lccn)
Example #4
0
def issue_pages_rdf(request, lccn, date, edition):
    title, issue, page = _get_tip(lccn, date, edition)
    graph = issue_to_graph(issue)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            mimetype='application/rdf+xml')
    return response
Example #5
0
def page_json(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    host = request.get_host()
    if page:
        return HttpResponse(page.json(host=host), mimetype='application/json')
    else:
        return HttpResponseNotFound()
Example #6
0
def page_ocr(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    crumbs = create_crumbs(title, issue, date, edition, page)
    host = request.get_host()
    return render_to_response('page_text.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Example #7
0
def page_ocr(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    crumbs = create_crumbs(title, issue, date, edition, page)
    host = request.get_host()
    return render_to_response('page_text.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Example #8
0
def issue_pages_rdf(request, lccn, date, edition):
    title, issue, page = _get_tip(lccn, date, edition)
    graph = issue_to_graph(issue)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            mimetype='application/rdf+xml')
    return response
Example #9
0
def page_ocr_txt(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    try:
        text = page.ocr.text
        return HttpResponse(text, mimetype='text/plain')
    except models.OCR.DoesNotExist, e:
        raise Http404("No OCR for %s" % page)
Example #10
0
def issue_pages_json(request, lccn, date, edition):
    title, issue, page = _get_tip(lccn, date, edition)
    host = request.get_host()
    if issue:
        return HttpResponse(issue.json(host=host), mimetype='application/json')
    else:
        return HttpResponseNotFound()
Example #11
0
def page_json(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    host = request.get_host()
    if page:
        return HttpResponse(page.json(host=host), content_type='application/json')
    else:
        return HttpResponseNotFound()
Example #12
0
def chronam_topic(request, topic_id):
    topic = get_object_or_404(models.Topic, pk=topic_id)
    page_title = topic.name
    crumbs = list(settings.BASE_CRUMBS)
    if urlresolvers.reverse('recommended_topics') in request.META.get('HTTP_REFERER'):
        crumbs.extend([{'label': 'Recommended Topics',        
                        'href': urlresolvers.reverse('recommended_topics')},
                       {'label': topic.name,
                        'href': urlresolvers.reverse('chronam_topic', 
                                              kwargs={'topic_id': topic.pk})}])
    else:
        referer = re.sub('^https?:\/\/', '', request.META.get('HTTP_REFERER')).split('/')
        try:
            lccn, date, edition, sequence = referer[2], referer[3], referer[4][-1], referer[5][-1]
            page = get_page(lccn, date, edition, sequence)
            if page: 
                title, issue, page = _get_tip(lccn, date, edition, sequence)
                crumbs = create_crumbs(title, issue, date, edition, page)
                crumbs.extend([{'label': topic.name,
                                'href': urlresolvers.reverse('chronam_topic',
                                              kwargs={'topic_id': topic.pk})}])
        except:
            pass
    important_dates = filter(lambda s: not s.isspace(), topic.important_dates.split('\n '))
    search_suggestions = topic.suggested_search_terms.split('\t')
    chronam_pages = [{'title': t.title, 'description': t.description.lstrip(t.title),
                      'url': t.url} for t in topic.topicpages_set.all()]
    return render_to_response('topic.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Example #13
0
def issue_pages_rdf(request, lccn, date, edition):
    title, issue, page = _get_tip(lccn, date, edition)
    graph = issue_to_graph(issue)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            content_type='application/rdf+xml')
    return add_cache_tag(response, "lccn=%s" % lccn)
Example #14
0
def page_ocr_xml(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    if page.ocr_abs_filename:
        response = sendfile(request, page.ocr_abs_filename)
        return add_cache_tag(response, "lccn=%s" % lccn)
    else:
        raise Http404("No ocr for page %s" % page)
Example #15
0
def page_ocr_xml(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    if page.ocr_abs_filename:
        response = sendfile(request, page.ocr_abs_filename)
        return add_cache_tag(response, "lccn=%s" % lccn)
    else:
        raise Http404("No ocr for page %s" % page)
Example #16
0
def issue_pages_json(request, lccn, date, edition):
    title, issue, page = _get_tip(lccn, date, edition)
    if issue:
        return HttpResponse(issue.json(request),
                            content_type="application/json")
    else:
        return HttpResponseNotFound()
Example #17
0
def page_ocr_txt(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    try:
        text = get_page_text(page)
    except models.OCR.DoesNotExist:
        raise Http404("No OCR for %s" % page)

    response = HttpResponse(text, content_type='text/plain')
    return add_cache_tag(response, "lccn=%s" % lccn)
Example #18
0
def page_ocr_txt(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    try:
        text = get_page_text(page)
    except models.OCR.DoesNotExist:
        raise Http404("No OCR for %s" % page)

    response = HttpResponse(text, content_type='text/plain')
    return add_cache_tag(response, "lccn=%s" % lccn)
Example #19
0
def page_ocr_txt(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    if not issue.batch.api_available:
        return HttpResponse(status=403)
    else:
        try:
            text = page.ocr.text
            return HttpResponse(text, content_type='text/plain')
        except models.OCR.DoesNotExist:
            raise Http404("No OCR for %s" % page)
Example #20
0
    def test_getting_text_from_solr_slovenia(self):
        """
        tests get_page_text() with batch batch_iune_oriole_ver01.
        First creates a page object 'page' with _get_tip()
        then uses it as an argument to get_page_text()
        """
        batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_iune_oriole_ver01')
        self.assertTrue(os.path.isdir(batch_dir))
        loader = BatchLoader(process_ocr=True)
        batch = loader.load_batch(batch_dir)
        self.assertEqual(batch.name, 'batch_iune_oriole_ver01')
        title, issue, page = _get_tip('sn83045377', '1906-03-01', 1, 1)
        text = get_page_text(page)
        self.assertIn("Od Mizo in dale", text[0])
        self.assertIn("To je preecj inoettii tobak! Marsi", text[0])

        # purge the batch and make sure it's gone from the db
        loader.purge_batch('batch_iune_oriole_ver01')
        self.assertEqual(Batch.objects.all().count(), 0)
        self.assertEqual(Title.objects.get(lccn='sn83045377').has_issues, False)
Example #21
0
def page(request, lccn, date, edition, sequence, words=None):
    fragments = []
    if words:
        fragments.append("words=" + words)
    qs = request.META.get('QUERY_STRING')
    if qs:
        fragments.append(qs)
    if fragments:
        path_parts = dict(lccn=lccn, date=date, edition=edition,
                          sequence=sequence)
        url = urlresolvers.reverse('chronam_page',
                                   kwargs=path_parts)
        
        return HttpResponseRedirect(url + "#" + "&".join(fragments))

    title, issue, page = _get_tip(lccn, date, edition, sequence)

    if not page.jp2_filename:
        notes = page.notes.filter(type="noteAboutReproduction")
        num_notes = notes.count()
        if num_notes >= 1:
            explanation = notes[0].text
        else:
            explanation = ""

    # if no word highlights were requests, see if the user came
    # from search engine results and attempt to highlight words from their
    # query by redirecting to a url that has the highlighted words in it
    if not words:
        try:
            words = _search_engine_words(request)
            words = '+'.join(words)
            if len(words) > 0:
                path_parts = dict(lccn=lccn, date=date, edition=edition,
                                  sequence=sequence, words=words)
                url = urlresolvers.reverse('chronam_page_words',
                                           kwargs=path_parts)
                return HttpResponseRedirect(url)
        except Exception, e:
            if settings.DEBUG:
                raise e
Example #22
0
def page(request, lccn, date, edition, sequence, words=None):
    fragments = []
    if words:
        fragments.append("words=" + words)
    qs = request.META.get('QUERY_STRING')
    if qs:
        fragments.append(qs)
    if fragments:
        path_parts = dict(lccn=lccn, date=date, edition=edition,
                          sequence=sequence)
        url = urlresolvers.reverse('chronam_page',
                                   kwargs=path_parts)
        
        return HttpResponseRedirect(url + "#" + "&".join(fragments))

    title, issue, page = _get_tip(lccn, date, edition, sequence)

    if not page.jp2_filename:
        notes = page.notes.filter(type="noteAboutReproduction")
        num_notes = notes.count()
        if num_notes >= 1:
            explanation = notes[0].text
        else:
            explanation = ""

    # if no word highlights were requests, see if the user came
    # from search engine results and attempt to highlight words from their
    # query by redirecting to a url that has the highlighted words in it
    if not words:
        try:
            words = _search_engine_words(request)
            words = '+'.join(words)
            if len(words) > 0:
                path_parts = dict(lccn=lccn, date=date, edition=edition,
                                  sequence=sequence, words=words)
                url = urlresolvers.reverse('chronam_page_words',
                                           kwargs=path_parts)
                return HttpResponseRedirect(url)
        except Exception, e:
            if settings.DEBUG:
                raise e
Example #23
0
    def test_getting_text_from_solr_utah(self):
        """
        tests get_page_text() with batch batch_uuml_thys_ver01.
        First creates a page object 'page' with _get_tip()
        then uses it as an argument to get_page_text()
        """
        batch_dir = os.path.join(settings.BATCH_STORAGE, 'batch_uuml_thys_ver01')
        self.assertTrue(os.path.isdir(batch_dir))
        loader = BatchLoader(process_ocr=True)
        batch = loader.load_batch(batch_dir)
        self.assertEqual(batch.name, 'batch_uuml_thys_ver01')
        title, issue, page = _get_tip('sn83045396', '1911-09-17', 1, 1)
        text = get_page_text(page)
        self.assertIn("Uc nice at tlio slate fair track", text[0])
        self.assertIn("PAGES FIVE CENTS", text[0])
        self.assertIn('gBter ho had left the grounds that', text[0])

        # purge the batch and make sure it's gone from the db
        loader.purge_batch('batch_uuml_thys_ver01')
        self.assertEqual(Batch.objects.all().count(), 0)
        self.assertEqual(Title.objects.get(lccn='sn83045396').has_issues, False)
Example #24
0
    def test_getting_text_from_solr_slovenia(self):
        """
        tests get_page_text() with batch batch_iune_oriole_ver01.
        First creates a page object 'page' with _get_tip()
        then uses it as an argument to get_page_text()
        """
        batch_dir = os.path.join(settings.BATCH_STORAGE,
                                 'batch_iune_oriole_ver01')
        self.assertTrue(os.path.isdir(batch_dir))
        loader = BatchLoader(process_ocr=True)
        batch = loader.load_batch(batch_dir)
        self.assertEqual(batch.name, 'batch_iune_oriole_ver01')
        title, issue, page = _get_tip('sn83045377', '1906-03-01', 1, 1)
        text = get_page_text(page)
        self.assertIn("Od Mizo in dale", text[0])
        self.assertIn("To je preecj inoettii tobak! Marsi", text[0])

        # purge the batch and make sure it's gone from the db
        loader.purge_batch('batch_iune_oriole_ver01')
        self.assertEqual(Batch.objects.all().count(), 0)
        self.assertEqual(
            Title.objects.get(lccn='sn83045377').has_issues, False)
Example #25
0
    def test_getting_text_from_solr_utah(self):
        """
        tests get_page_text() with batch batch_uuml_thys_ver01.
        First creates a page object 'page' with _get_tip()
        then uses it as an argument to get_page_text()
        """
        batch_dir = os.path.join(settings.BATCH_STORAGE,
                                 'batch_uuml_thys_ver01')
        self.assertTrue(os.path.isdir(batch_dir))
        loader = BatchLoader(process_ocr=True)
        batch = loader.load_batch(batch_dir)
        self.assertEqual(batch.name, 'batch_uuml_thys_ver01')
        title, issue, page = _get_tip('sn83045396', '1911-09-17', 1, 1)
        text = get_page_text(page)
        self.assertIn("Uc nice at tlio slate fair track", text[0])
        self.assertIn("PAGES FIVE CENTS", text[0])
        self.assertIn('gBter ho had left the grounds that', text[0])

        # purge the batch and make sure it's gone from the db
        loader.purge_batch('batch_uuml_thys_ver01')
        self.assertEqual(Batch.objects.all().count(), 0)
        self.assertEqual(
            Title.objects.get(lccn='sn83045396').has_issues, False)
Example #26
0
def page(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)

    if not page.jp2_filename:
        notes = page.notes.filter(type="noteAboutReproduction")
        num_notes = notes.count()
        if num_notes >= 1:
            explanation = notes[0].text
        else:
            explanation = ""

    # see if the user came from search engine results and attempt to
    # highlight words from their query by redirecting to a url that
    # has the highlighted words in it
    try:
        words = _search_engine_words(request)
        words = '+'.join(words)
        if len(words) > 0:
            path_parts = dict(lccn=lccn,
                              date=date,
                              edition=edition,
                              sequence=sequence)
            url = '%s?%s#%s' % (urlresolvers.reverse('chronam_page_words',
                                                     kwargs=path_parts),
                                request.GET.urlencode(), words)
            response = HttpResponseRedirect(url)
            return add_cache_tag(response, "lccn=%s" % lccn)
    except Exception as exc:
        LOGGER.error(
            "Failed to add search highlighting based on the referred search engine query: %s",
            exc,
            exc_info=True)
        if settings.DEBUG:
            raise
        # else squish the exception so the page will still get
        # served up minus the highlights

    # Calculate the previous_issue_first_page. Note: it was decided
    # that we want to skip over issues with missing pages. See ticket
    # #383.
    _issue = issue
    while True:
        previous_issue_first_page = None
        _issue = _issue.previous
        if not _issue:
            break
        previous_issue_first_page = _issue.first_page
        if previous_issue_first_page:
            break

    # do the same as above but for next_issue this time.
    _issue = issue
    while True:
        next_issue_first_page = None
        _issue = _issue.next
        if not _issue:
            break
        next_issue_first_page = _issue.first_page
        if next_issue_first_page:
            break

    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    page_head_heading = "%s, %s, %s" % (title.display_name, label(issue),
                                        label(page))
    page_head_subheading = label(title)
    crumbs = create_crumbs(title, issue, date, edition, page)

    filename = page.jp2_abs_filename
    if filename:
        try:
            im = os.path.getsize(filename)
            image_size = filesizeformat(im)
        except OSError:
            image_size = "Unknown"

    image_credit = issue.batch.awardee.name
    host = request.get_host()
    profile_uri = 'http://www.openarchives.org/ore/html/'

    template = "page.html"
    text = get_page_text(page)
    response = render_to_response(template,
                                  dictionary=locals(),
                                  context_instance=RequestContext(request))
    return add_cache_tag(response, "lccn=%s" % lccn)
Example #27
0
def page_ocr_xml(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    return _stream_file(page.ocr_abs_filename, 'application/xml')
Example #28
0
def page_jp2(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    return _stream_file(page.jp2_abs_filename, 'image/jp2')
Example #29
0
def page_pdf(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    return _stream_file(page.pdf_abs_filename, 'application/pdf')
Example #30
0
def page_jp2(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    return _stream_file(page.jp2_abs_filename, 'image/jp2')
Example #31
0
def page_ocr_xml(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    return _stream_file(page.ocr_abs_filename, 'application/xml')
Example #32
0
def page(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)

    if not page.jp2_filename:
        notes = page.notes.filter(type="noteAboutReproduction")
        num_notes = notes.count()
        if num_notes >= 1:
            explanation = notes[0].text
        else:
            explanation = ""

    # see if the user came from search engine results and attempt to
    # highlight words from their query by redirecting to a url that
    # has the highlighted words in it
    try:
        words = _search_engine_words(request)
        words = '+'.join(words)
        if len(words) > 0:
            path_parts = dict(lccn=lccn, date=date, edition=edition, sequence=sequence)
            url = '%s?%s#%s' % (urlresolvers.reverse('chronam_page_words',
                                                     kwargs=path_parts), request.GET.urlencode(), words)
            response = HttpResponseRedirect(url)
            return add_cache_tag(response, "lccn=%s" % lccn)
    except Exception as exc:
        LOGGER.error("Failed to add search highlighting based on the referred search engine query: %s",
                     exc, exc_info=True)
        if settings.DEBUG:
            raise
        # else squish the exception so the page will still get
        # served up minus the highlights

    # Calculate the previous_issue_first_page. Note: it was decided
    # that we want to skip over issues with missing pages. See ticket
    # #383.
    _issue = issue
    while True:
        previous_issue_first_page = None
        _issue = _issue.previous
        if not _issue:
            break
        previous_issue_first_page = _issue.first_page
        if previous_issue_first_page:
            break

    # do the same as above but for next_issue this time.
    _issue = issue
    while True:
        next_issue_first_page = None
        _issue = _issue.next
        if not _issue:
            break
        next_issue_first_page = _issue.first_page
        if next_issue_first_page:
            break

    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    page_head_heading = "%s, %s, %s" % (title.display_name, label(issue), label(page))
    page_head_subheading = label(title)
    crumbs = create_crumbs(title, issue, date, edition, page)

    filename = page.jp2_abs_filename
    if filename:
        try:
            im = os.path.getsize(filename)
            image_size = filesizeformat(im)
        except OSError:
            image_size = "Unknown"

    image_credit = issue.batch.awardee.name
    host = request.get_host()
    profile_uri = 'http://www.openarchives.org/ore/html/'

    template = "page.html"
    text = get_page_text(page)
    response = render_to_response(template, dictionary=locals(),
                                  context_instance=RequestContext(request))
    return add_cache_tag(response, "lccn=%s" % lccn)
Example #33
0
def page_ocr_xml(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    if not issue.batch.api_available:
        return HttpResponse(status=403)
    else:
        return _stream_file(page.ocr_abs_filename, 'application/xml')
Example #34
0
def page_pdf(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    return _stream_file(page.pdf_abs_filename, 'application/pdf')
Example #35
0
def page_jp2(request, lccn, date, edition, sequence):
    title, issue, page = _get_tip(lccn, date, edition, sequence)
    if not issue.batch.api_available:
        return HttpResponse(status=403)
    else:
        return _stream_file(page.jp2_abs_filename, 'image/jp2')