Example #1
0
    def test_volume_pages(self, mockpaginator, mockrepo):
        mockvol = NonCallableMock(spec=Volume)
        mockvol.pid = 'vol:1'
        mockvol.title = 'Lecoq, the detective'
        mockvol.date = ['1801']
        # second object retrieved from fedora is page, for layout
        mockvol.width = 150
        mockvol.height = 200
        # volume url needed to identify annotations for pages in this volume
        mockvol.get_absolute_url.return_value = reverse(
            'books:volume', kwargs={'pid': mockvol.pid})
        mockrepo.return_value.get_object.return_value = mockvol
        mockvol.find_solr_pages = MagicMock()
        mockvol.find_solr_pages.return_value.count = 3
        mockvol.find_solr_pages.__len__.return_value = 3
        mockpage = Mock(width=640, height=400)
        mockvol.pages = [mockpage]

        vol_page_url = reverse('books:pages', kwargs={'pid': mockvol.pid})
        response = self.client.get(vol_page_url)
        # volume method should be used to find pages
        self.assert_(call() in mockvol.find_solr_pages.call_args_list)
        # volume should be set in context
        self.assert_(mockvol, response.context['vol'])
        # annotated pages should be empty for anonymous user
        self.assertEqual({}, response.context['annotated_pages'])

        # log in as a regular user
        self.client.login(**self.user_credentials['user'])
        testuser = get_user_model().objects.get(
            username=self.user_credentials['user']['username'])

        page1_url = reverse('books:page',
                            kwargs={
                                'vol_pid': mockvol.pid,
                                'pid': 'page:1'
                            })
        page2_url = reverse('books:page',
                            kwargs={
                                'vol_pid': mockvol.pid,
                                'pid': 'page:2'
                            })
        page3_url = reverse('books:page',
                            kwargs={
                                'vol_pid': mockvol.pid,
                                'pid': 'page:3'
                            })
        mockvol.page_annotation_count.return_value = {
            absolutize_url(page1_url): 5,
            absolutize_url(page2_url): 2,
            page3_url: 13
        }
        response = self.client.get(vol_page_url)
        mockvol.page_annotation_count.assert_called_with(testuser)
        annotated_pages = response.context['annotated_pages']
        # counts should be preserved; urls should be non-absolute
        # whether they started that way or not
        self.assertEqual(5, annotated_pages[absolutize_url(page1_url)])
        self.assertEqual(2, annotated_pages[absolutize_url(page2_url)])
        self.assertEqual(13, annotated_pages[page3_url])
Example #2
0
    def test_volume_pages(self, mockpaginator, mockrepo):
        mockvol = NonCallableMock(spec=Volume)
        mockvol.pid = 'vol:1'
        mockvol.title = 'Lecoq, the detective'
        mockvol.date = ['1801']
        # second object retrieved from fedora is page, for layout
        mockvol.width = 150
        mockvol.height = 200
        # volume url needed to identify annotations for pages in this volume
        mockvol.get_absolute_url.return_value = reverse('books:volume',
            kwargs={'pid': mockvol.pid})
        mockrepo.return_value.get_object.return_value = mockvol
        mockvol.find_solr_pages = MagicMock()
        mockvol.find_solr_pages.return_value.count = 3
        mockvol.find_solr_pages.__len__.return_value = 3
        mockpage = Mock(width=640, height=400)
        mockvol.pages = [mockpage]

        vol_page_url = reverse('books:pages', kwargs={'pid': mockvol.pid})
        response = self.client.get(vol_page_url)
        # volume method should be used to find pages
        self.assert_(call() in mockvol.find_solr_pages.call_args_list)
        # volume should be set in context
        self.assert_(mockvol, response.context['vol'])
        # annotated pages should be empty for anonymous user
        self.assertEqual({}, response.context['annotated_pages'])

        # log in as a regular user
        self.client.login(**self.user_credentials['user'])
        testuser = get_user_model().objects.get(username=self.user_credentials['user']['username'])

        page1_url = reverse('books:page', kwargs={'vol_pid': mockvol.pid, 'pid': 'page:1'})
        page2_url = reverse('books:page', kwargs={'vol_pid': mockvol.pid, 'pid': 'page:2'})
        page3_url = reverse('books:page', kwargs={'vol_pid': mockvol.pid, 'pid': 'page:3'})
        mockvol.page_annotation_count.return_value = {
          absolutize_url(page1_url): 5,
          absolutize_url(page2_url): 2,
          page3_url: 13
        }
        response = self.client.get(vol_page_url)
        mockvol.page_annotation_count.assert_called_with(testuser)
        annotated_pages = response.context['annotated_pages']
        # counts should be preserved; urls should be non-absolute
        # whether they started that way or not
        self.assertEqual(5, annotated_pages[absolutize_url(page1_url)])
        self.assertEqual(2, annotated_pages[absolutize_url(page2_url)])
        self.assertEqual(13, annotated_pages[page3_url])
Example #3
0
 def image_url(self):
     'Preliminary image url, for use in tei facsimile'
     # TODO: we probably want to use some version of the ARK here
     # return unicode(self.iiif)
     # use the readux url, rather than exposing IIIF url directly
     return absolutize_url(reverse('books:page-image',
         kwargs={'vol_pid': self.volume.pid, 'pid': self.pid,
                 'mode': 'fullsize'}))
Example #4
0
    def get_default_pid(self):
        '''Default pid logic for DigitalObjects in :mod:`readux`.  Mint a
        new ARK via the PID manager, store the ARK in the MODS
        metadata (if available) or Dublin Core, and use the noid
        portion of the ARK for a Fedora pid in the site-configured
        Fedora pidspace.'''
        global pidman

        if pidman is not None:
            # pidman wants a target for the new pid
            # generate a pidman-ready target for a named view

            # Use the object absolute url method
            # NOTE: this requires that all values used in a url be set
            # (i.e., page objects must have volume pid configured)
            self.pid = '%s:%s' % (self.default_pidspace, self.PID_TOKEN)
            target = self.get_absolute_url()

            # reverse() encodes the PID_TOKEN and the :, so just unquote the url
            # (shouldn't contain anything else that needs escaping)
            target = urllib.unquote(target)

            # reverse() returns a full path - absolutize so we get scheme & server also
            target = absolutize_url(target)
            # pid name is not required, but helpful for managing pids
            pid_name = self.label
            # ask pidman for a new ark in the configured pidman domain
            try:
                ark = pidman.create_ark(settings.PIDMAN_DOMAIN, target, name=pid_name)
            except httplib.BadStatusLine:
                logger.warn('Error creating ARK; re-initializing pidman client and trying again')
                pidman = DjangoPidmanRestClient()
                ark = pidman.create_ark(settings.PIDMAN_DOMAIN, target, name=pid_name)
            # pidman returns the full, resolvable ark
            # parse into dictionary with nma, naan, and noid
            parsed_ark = parse_ark(ark)
            noid = parsed_ark['noid']  # nice opaque identifier

            # Add full uri ARK to dc:identifier
            self.dc.content.identifier_list.append(ark)

            # use the noid to construct a pid in the configured pidspace
            return '%s:%s' % (self.default_pidspace, noid)
        else:
            # if pidmanager is not available, fall back to default pid behavior
            return super(DigitalObject, self).get_default_pid()
Example #5
0
    def get(self, request, *args, **kwargs):
        url = self.get_redirect_url(*args, **kwargs)
        # use headers to allow browsers to cache downloaded copies
        headers = {}
        for header in [
                'HTTP_IF_MODIFIED_SINCE', 'HTTP_IF_UNMODIFIED_SINCE',
                'HTTP_IF_MATCH', 'HTTP_IF_NONE_MATCH'
        ]:
            if header in request.META:
                headers[header.replace('HTTP_', '')] = request.META.get(header)
        remote_response = requests.get(url, headers=headers)
        local_response = HttpResponse()
        local_response.status_code = remote_response.status_code

        # include response headers, except for server-specific items
        for header, value in remote_response.headers.iteritems():
            if header not in ['Connection', 'Server', 'Keep-Alive', 'Link']:
                # 'Access-Control-Allow-Origin', 'Link']:
                # FIXME: link header is valuable, but would
                # need to be made relative to current url
                local_response[header] = value

        # special case, for deep zoom (hack)
        if kwargs['mode'] == 'info':
            data = remote_response.json()
            # need to adjust the id to be relative to current url
            # this is a hack, patching in a proxy iiif interface at this url
            data['@id'] = absolutize_url(
                request.path.replace('/info/', '/iiif'))
            local_response.content = json.dumps(data)
            # upate content-length for change in data
            local_response['content-length'] = len(local_response.content)
            # needed to allow external site (i.e. jekyll export)
            # to use deepzoom
            local_response['Access-Control-Allow-Origin'] = '*'
        else:
            # include response content if any
            local_response.content = remote_response.content

        return local_response
Example #6
0
    def get(self, request):
        # Include absolute API links as per annotator 2.0 documentation
        # http://docs.annotatorjs.org/en/latest/modules/storage.html#storage-api
        base_url = absolutize_url(reverse('annotation-api:index'))

        return JsonResponse({
            "name": "Annotator Store API",
            "version": "2.0.0",
            "links": {
                "annotation": {
                    "create": {
                        "desc": "Create a new annotation",
                        "method": "POST",
                        "url": "%sannotations" % base_url
                    },
                    "delete": {
                        "desc": "Delete an annotation",
                        "method": "DELETE",
                        "url": "%sannotations/:id" % base_url
                    },
                    "read": {
                        "desc": "Get an existing annotation",
                        "method": "GET",
                        "url": "%sannotations/:id" % base_url
                    },
                    "update": {
                        "desc": "Update an existing annotation",
                        "method": "PUT",
                        "url": "%sannotations/:id" % base_url
                    }
                },
                "search": {
                    "desc": "Basic search API",
                    "method": "GET",
                    "url": "%ssearch" % base_url
                }
            }
        })
Example #7
0
    def get(self, request, *args, **kwargs):
        url = self.get_redirect_url(*args, **kwargs)
        # use headers to allow browsers to cache downloaded copies
        headers = {}
        for header in ['HTTP_IF_MODIFIED_SINCE', 'HTTP_IF_UNMODIFIED_SINCE',
                       'HTTP_IF_MATCH', 'HTTP_IF_NONE_MATCH']:
            if header in request.META:
                headers[header.replace('HTTP_', '')] = request.META.get(header)
        remote_response = requests.get(url, headers=headers)
        local_response = HttpResponse()
        local_response.status_code = remote_response.status_code

        # include response headers, except for server-specific items
        for header, value in remote_response.headers.iteritems():
            if header not in ['Connection', 'Server', 'Keep-Alive', 'Link']:
                             # 'Access-Control-Allow-Origin', 'Link']:
                # FIXME: link header is valuable, but would
                # need to be made relative to current url
                local_response[header] = value

        # special case, for deep zoom (hack)
        if kwargs['mode'] == 'info':
            data = remote_response.json()
            # need to adjust the id to be relative to current url
            # this is a hack, patching in a proxy iiif interface at this url
            data['@id'] = absolutize_url(request.path.replace('/info/', '/iiif'))
            local_response.content = json.dumps(data)
            # upate content-length for change in data
            local_response['content-length'] = len(local_response.content)
            # needed to allow external site (i.e. jekyll export)
            # to use deepzoom
            local_response['Access-Control-Allow-Origin'] = '*'
        else:
            # include response content if any
            local_response.content = remote_response.content

        return local_response
Example #8
0
def annotation_to_tei(annotation, teivol):
    '''Generate a tei note from an annotation.  Sets annotation id,
    slugified tags as ana attribute, username as resp attribute, and
    annotation content is converted from markdown to TEI.

    :param annotation: :class:`~readux.annotations.models.Annotation`
    :param teivol: :class:`~readux.books.tei.AnnotatedFacsimile` tei
        document, for converting related page ARK uris into TEI ids
    :returns: :class:`readux.books.tei.Note`
    '''
    # NOTE: annotation created/edited dates are not included here
    # because they were determined not to be relevant for our purposes

    # sample note provided by Alice
    # <note resp="JPK" xml:id="oshnp50n1" n="1"><p>This is an example note.</p></note>

    # convert markdown-formatted text content to tei
    note_content = markdown_tei.convert(annotation.text)
    # markdown results could be a list of paragraphs, and not a proper
    # xml tree; also, pags do not include namespace
    # wrap in a note element and set the default namespace as tei
    teinote = load_xmlobject_from_string('<note xmlns="%s">%s</note>' % \
        (teimap.TEI_NAMESPACE, note_content),
        tei.Note)

    # what id do we want? annotation uuid? url?
    teinote.id = 'annotation-%s' % annotation.id  # can't start with numeric
    teinote.href = absolutize_url(annotation.get_absolute_url())
    teinote.type = 'annotation'

    # if an annotation includes tags, reference them by slugified id in @ana
    if 'tags' in annotation.info() and annotation.info()['tags']:
        tags = ' '.join(
            set('#%s' % slugify(t.strip()) for t in annotation.info()['tags']))
        teinote.ana = tags

    # if the annotation has an associated user, mark the author
    # as responsible for the note
    if annotation.user:
        teinote.resp = annotation.user.username

    # include full markdown of the annotation, as a backup for losing
    # content converting from markdown to tei, and for easy display
    teinote.markdown = annotation.text

    # if annotation contains related pages, generate a link group
    if annotation.related_pages:
        for rel_page in annotation.related_pages:
            page_ref = tei.Ref(text=rel_page, type='related page')
            # find tei page identifier from the page ark
            target = teivol.page_id_by_xlink(rel_page)
            if target is not None:
                page_ref.target = '#%s' % target
            teinote.related_pages.append(page_ref)

    # if annotation includes citations, add them to the tei
    # NOTE: expects these citations to be TEI encoded already (generated
    # by the zotero api and added via meltdown-zotero annotator plugin)
    if annotation.extra_data.get('citations', None):
        for bibl in annotation.extra_data['citations']:
            # zotero tei export currently includes an id that is not
            # a valid ncname (contains : and /)
            bibsoup = BeautifulSoup(bibl, 'xml')
            # convert xml id into the format we want:
            # zotero-#### (zotero item id)
            for bibl_struct in bibsoup.find_all('biblStruct'):
                bibl_struct['xml:id'] = 'zotero-%s' % \
                    bibl_struct['xml:id'].split('/')[-1]

            teibibl = load_xmlobject_from_string(bibsoup.biblStruct.prettify(),
                                                 tei.BiblStruct)
            teinote.citations.append(teibibl)

    return teinote
Example #9
0
 def volume_url(self, obj):
     # generate an absolute url to the pdf for a volume object
     return absolutize_url(
         urllib.unquote(reverse('books:volume', kwargs={'pid': obj.pid})))
Example #10
0
 def pdf_url(self, obj):
     # generate an absolute url to the pdf for a volume object
     return absolutize_url(obj.pdf_url())
Example #11
0
    def generate_volume_tei(self):
        '''Generate TEI for a volume by combining the TEI for
        all pages.'''
        if not self.has_tei:
            return

        # store volume TEI in django cache, because generating TEI
        # for a large volume is expensive (fedora api calls for each page)
        cache_key = '%s-tei' % self.pid
        vol_tei_xml = cache.get(cache_key, None)
        if vol_tei_xml:
            logger.debug('Loading volume TEI for %s from cache' % self.pid)
            vol_tei = xmlmap.load_xmlobject_from_string(vol_tei_xml,
                tei.Facsimile)

        # if tei was not in the cache, generate it
        if vol_tei_xml is None:
            start = time.time()
            vol_tei = tei.Facsimile()
            # populate header information
            vol_tei.create_header()
            vol_tei.header.title = self.title
            # publication statement
            vol_tei.distributor = settings.TEI_DISTRIBUTOR
            vol_tei.pubstmt.distributor_readux = 'Readux'
            vol_tei.pubstmt.desc = 'TEI facsimile generated by Readux version %s' % __version__
            # source description - original publication
            vol_tei.create_original_source()
            vol_tei.original_source.title = self.title
            # original publication date
            if self.date:
                vol_tei.original_source.date = self.date[0]
            # if authors are set, it should be a list
            if self.creator:
                vol_tei.original_source.authors = self.creator
            # source description - digital edition
            vol_tei.create_digital_source()
            vol_tei.digital_source.title = '%s, digital edition' % self.title
            vol_tei.digital_source.date = self.digital_ed_date
            # FIXME: ideally, these would be ARKs, but ARKs for readux volume
            # content do not yet resolve to Readux urls
            vol_tei.digital_source.url = absolutize_url(self.get_absolute_url())
            vol_tei.digital_source.pdf_url = absolutize_url(self.pdf_url())

            # loop through pages and add tei content
            # for page in self.pages[:10]:   # FIXME: temporary, for testing/speed
            page_order = 1
            for page in self.pages:
                if page.tei.exists and page.tei.content.page:
                    # include facsimile page *only* from the tei for each page
                    # tei facsimile already includes a graphic url
                    teipage = page.tei.content.page

                    # add a reference from tei page to readux page
                    # pages should have ARKS; fall back to readux url if
                    # ark is not present (only expected to happen in dev)
                    teipage.href = page.ark_uri or absolutize_url(page.get_absolute_url())
                    # NOTE: generating ark_uri currently requires loading
                    # DC from fedora; could we generate reliably based on the pid?

                    # teipage.n = page.page_order
                    teipage.n = page_order
                    # NOTE: normally we would use page.page_order, but that
                    # requires an additional api call for each page
                    # to load the rels-ext, so use a local counter instead

                    # ensure graphic elements are present for image variants
                    # full size, page size, thumbnail, and deep zoom variants
                    # NOTE: graphic elements need to come immediately after
                    # surface and before zone; adding them before removing
                    # existing graphic element should place them correctly.

                    # mapping of types we want in the tei and
                    # corresponding mode to pass to the url
                    image_types = {
                        'full': 'fs',
                        'page': 'single-page',
                        'thumbnail': 'thumbnail',
                        'small-thumbnail': 'mini-thumbnail',
                        'json': 'info',
                    }
                    for image_type, mode in image_types.iteritems():
                        teipage.graphics.append(tei.Graphic(rend=image_type,
                            url=absolutize_url(reverse('books:page-image',
                                kwargs={'vol_pid': self.pid, 'pid': page.pid, 'mode': mode}))),
                        )

                    # page tei should have an existing graphic reference
                    # remove it from our output
                    if teipage.graphics[0].rend is None:
                        del teipage.graphics[0]

                    vol_tei.page_list.append(teipage)

                    page_order += 1

            logger.info('Volume TEI for %s with %d pages generated in %.02fs' %  \
                (self.pid, len(self.pages), time.time() - start))

        # update current date for either version (new or cached)
        # store current date (tei generation) in publication statement
        export_date = datetime.now()
        vol_tei.pubstmt.date = export_date
        vol_tei.pubstmt.date_normal = export_date

        # save current volume tei in django cache
        cache.set(cache_key, vol_tei.serialize(), 3000)


        return vol_tei
Example #12
0
 def absolute_url(self):
     '''Generate an absolute url to the page view, for external services
     or for referencing in annotations.'''
     return absolutize_url(self.get_absolute_url())
Example #13
0
 def fulltext_absolute_url(self):
     '''Generate an absolute url to the text view for this volume
     for use with external services such as voyant-tools.org'''
     return absolutize_url(reverse('books:text', kwargs={'pid': self.pid}))
Example #14
0
 def volume_url(self, obj):
     # generate an absolute url to the pdf for a volume object
     return absolutize_url(urllib.unquote(reverse('books:volume', kwargs={'pid': obj.pid})))
Example #15
0
 def pdf_url(self, obj):
     # generate an absolute url to the pdf for a volume object
     return absolutize_url(obj.pdf_url())
Example #16
0
def annotation_to_tei(annotation, teivol):
    '''Generate a tei note from an annotation.  Sets annotation id,
    slugified tags as ana attribute, username as resp attribute, and
    annotation content is converted from markdown to TEI.

    :param annotation: :class:`~readux.annotations.models.Annotation`
    :param teivol: :class:`~readux.books.tei.AnnotatedFacsimile` tei
        document, for converting related page ARK uris into TEI ids
    :returns: :class:`readux.books.tei.Note`
    '''
    # NOTE: annotation created/edited dates are not included here
    # because they were determined not to be relevant for our purposes

    # sample note provided by Alice
    # <note resp="JPK" xml:id="oshnp50n1" n="1"><p>This is an example note.</p></note>

    # convert markdown-formatted text content to tei
    note_content = markdown_tei.convert(annotation.text)
    # markdown results could be a list of paragraphs, and not a proper
    # xml tree; also, pags do not include namespace
    # wrap in a note element and set the default namespace as tei
    teinote = load_xmlobject_from_string('<note xmlns="%s">%s</note>' % \
        (teimap.TEI_NAMESPACE, note_content),
        tei.Note)

    # what id do we want? annotation uuid? url?
    teinote.id = 'annotation-%s' % annotation.id  # can't start with numeric
    teinote.href = absolutize_url(annotation.get_absolute_url())
    teinote.type = 'annotation'

    # if an annotation includes tags, reference them by slugified id in @ana
    if 'tags' in annotation.info() and annotation.info()['tags']:
        tags = ' '.join(set('#%s' % slugify(t.strip())
                            for t in annotation.info()['tags']))
        teinote.ana = tags

    # if the annotation has an associated user, mark the author
    # as responsible for the note
    if annotation.user:
        teinote.resp = annotation.user.username

    # include full markdown of the annotation, as a backup for losing
    # content converting from markdown to tei, and for easy display
    teinote.markdown = annotation.text

    # if annotation contains related pages, generate a link group
    if annotation.related_pages:
        for rel_page in annotation.related_pages:
            page_ref = tei.Ref(text=rel_page, type='related page')
            # find tei page identifier from the page ark
            target = teivol.page_id_by_xlink(rel_page)
            if target is not None:
                page_ref.target = '#%s' % target
            teinote.related_pages.append(page_ref)

    # if annotation includes citations, add them to the tei
    # NOTE: expects these citations to be TEI encoded already (generated
    # by the zotero api and added via meltdown-zotero annotator plugin)
    if annotation.extra_data.get('citations', None):
        for bibl in annotation.extra_data['citations']:
            # zotero tei export currently includes an id that is not
            # a valid ncname (contains : and /)
            bibsoup = BeautifulSoup(bibl, 'xml')
            # convert xml id into the format we want:
            # zotero-#### (zotero item id)
            for bibl_struct in bibsoup.find_all('biblStruct'):
                bibl_struct['xml:id'] = 'zotero-%s' % \
                    bibl_struct['xml:id'].split('/')[-1]

            teibibl = load_xmlobject_from_string(bibsoup.biblStruct.prettify(),
                                                 tei.BiblStruct)
            teinote.citations.append(teibibl)

    return teinote