def test_volume_pages(self, mockpaginator, mockrepo): mockvol = NonCallableMock(spec=Volume) mockvol.pid = 'vol:1' mockvol.title = 'Lecoq, the detective' mockvol.date = ['1801'] # second object retrieved from fedora is page, for layout mockvol.width = 150 mockvol.height = 200 # volume url needed to identify annotations for pages in this volume mockvol.get_absolute_url.return_value = reverse( 'books:volume', kwargs={'pid': mockvol.pid}) mockrepo.return_value.get_object.return_value = mockvol mockvol.find_solr_pages = MagicMock() mockvol.find_solr_pages.return_value.count = 3 mockvol.find_solr_pages.__len__.return_value = 3 mockpage = Mock(width=640, height=400) mockvol.pages = [mockpage] vol_page_url = reverse('books:pages', kwargs={'pid': mockvol.pid}) response = self.client.get(vol_page_url) # volume method should be used to find pages self.assert_(call() in mockvol.find_solr_pages.call_args_list) # volume should be set in context self.assert_(mockvol, response.context['vol']) # annotated pages should be empty for anonymous user self.assertEqual({}, response.context['annotated_pages']) # log in as a regular user self.client.login(**self.user_credentials['user']) testuser = get_user_model().objects.get( username=self.user_credentials['user']['username']) page1_url = reverse('books:page', kwargs={ 'vol_pid': mockvol.pid, 'pid': 'page:1' }) page2_url = reverse('books:page', kwargs={ 'vol_pid': mockvol.pid, 'pid': 'page:2' }) page3_url = reverse('books:page', kwargs={ 'vol_pid': mockvol.pid, 'pid': 'page:3' }) mockvol.page_annotation_count.return_value = { absolutize_url(page1_url): 5, absolutize_url(page2_url): 2, page3_url: 13 } response = self.client.get(vol_page_url) mockvol.page_annotation_count.assert_called_with(testuser) annotated_pages = response.context['annotated_pages'] # counts should be preserved; urls should be non-absolute # whether they started that way or not self.assertEqual(5, annotated_pages[absolutize_url(page1_url)]) self.assertEqual(2, annotated_pages[absolutize_url(page2_url)]) self.assertEqual(13, annotated_pages[page3_url])
def test_volume_pages(self, mockpaginator, mockrepo): mockvol = NonCallableMock(spec=Volume) mockvol.pid = 'vol:1' mockvol.title = 'Lecoq, the detective' mockvol.date = ['1801'] # second object retrieved from fedora is page, for layout mockvol.width = 150 mockvol.height = 200 # volume url needed to identify annotations for pages in this volume mockvol.get_absolute_url.return_value = reverse('books:volume', kwargs={'pid': mockvol.pid}) mockrepo.return_value.get_object.return_value = mockvol mockvol.find_solr_pages = MagicMock() mockvol.find_solr_pages.return_value.count = 3 mockvol.find_solr_pages.__len__.return_value = 3 mockpage = Mock(width=640, height=400) mockvol.pages = [mockpage] vol_page_url = reverse('books:pages', kwargs={'pid': mockvol.pid}) response = self.client.get(vol_page_url) # volume method should be used to find pages self.assert_(call() in mockvol.find_solr_pages.call_args_list) # volume should be set in context self.assert_(mockvol, response.context['vol']) # annotated pages should be empty for anonymous user self.assertEqual({}, response.context['annotated_pages']) # log in as a regular user self.client.login(**self.user_credentials['user']) testuser = get_user_model().objects.get(username=self.user_credentials['user']['username']) page1_url = reverse('books:page', kwargs={'vol_pid': mockvol.pid, 'pid': 'page:1'}) page2_url = reverse('books:page', kwargs={'vol_pid': mockvol.pid, 'pid': 'page:2'}) page3_url = reverse('books:page', kwargs={'vol_pid': mockvol.pid, 'pid': 'page:3'}) mockvol.page_annotation_count.return_value = { absolutize_url(page1_url): 5, absolutize_url(page2_url): 2, page3_url: 13 } response = self.client.get(vol_page_url) mockvol.page_annotation_count.assert_called_with(testuser) annotated_pages = response.context['annotated_pages'] # counts should be preserved; urls should be non-absolute # whether they started that way or not self.assertEqual(5, annotated_pages[absolutize_url(page1_url)]) self.assertEqual(2, annotated_pages[absolutize_url(page2_url)]) self.assertEqual(13, annotated_pages[page3_url])
def image_url(self): 'Preliminary image url, for use in tei facsimile' # TODO: we probably want to use some version of the ARK here # return unicode(self.iiif) # use the readux url, rather than exposing IIIF url directly return absolutize_url(reverse('books:page-image', kwargs={'vol_pid': self.volume.pid, 'pid': self.pid, 'mode': 'fullsize'}))
def get_default_pid(self): '''Default pid logic for DigitalObjects in :mod:`readux`. Mint a new ARK via the PID manager, store the ARK in the MODS metadata (if available) or Dublin Core, and use the noid portion of the ARK for a Fedora pid in the site-configured Fedora pidspace.''' global pidman if pidman is not None: # pidman wants a target for the new pid # generate a pidman-ready target for a named view # Use the object absolute url method # NOTE: this requires that all values used in a url be set # (i.e., page objects must have volume pid configured) self.pid = '%s:%s' % (self.default_pidspace, self.PID_TOKEN) target = self.get_absolute_url() # reverse() encodes the PID_TOKEN and the :, so just unquote the url # (shouldn't contain anything else that needs escaping) target = urllib.unquote(target) # reverse() returns a full path - absolutize so we get scheme & server also target = absolutize_url(target) # pid name is not required, but helpful for managing pids pid_name = self.label # ask pidman for a new ark in the configured pidman domain try: ark = pidman.create_ark(settings.PIDMAN_DOMAIN, target, name=pid_name) except httplib.BadStatusLine: logger.warn('Error creating ARK; re-initializing pidman client and trying again') pidman = DjangoPidmanRestClient() ark = pidman.create_ark(settings.PIDMAN_DOMAIN, target, name=pid_name) # pidman returns the full, resolvable ark # parse into dictionary with nma, naan, and noid parsed_ark = parse_ark(ark) noid = parsed_ark['noid'] # nice opaque identifier # Add full uri ARK to dc:identifier self.dc.content.identifier_list.append(ark) # use the noid to construct a pid in the configured pidspace return '%s:%s' % (self.default_pidspace, noid) else: # if pidmanager is not available, fall back to default pid behavior return super(DigitalObject, self).get_default_pid()
def get(self, request, *args, **kwargs): url = self.get_redirect_url(*args, **kwargs) # use headers to allow browsers to cache downloaded copies headers = {} for header in [ 'HTTP_IF_MODIFIED_SINCE', 'HTTP_IF_UNMODIFIED_SINCE', 'HTTP_IF_MATCH', 'HTTP_IF_NONE_MATCH' ]: if header in request.META: headers[header.replace('HTTP_', '')] = request.META.get(header) remote_response = requests.get(url, headers=headers) local_response = HttpResponse() local_response.status_code = remote_response.status_code # include response headers, except for server-specific items for header, value in remote_response.headers.iteritems(): if header not in ['Connection', 'Server', 'Keep-Alive', 'Link']: # 'Access-Control-Allow-Origin', 'Link']: # FIXME: link header is valuable, but would # need to be made relative to current url local_response[header] = value # special case, for deep zoom (hack) if kwargs['mode'] == 'info': data = remote_response.json() # need to adjust the id to be relative to current url # this is a hack, patching in a proxy iiif interface at this url data['@id'] = absolutize_url( request.path.replace('/info/', '/iiif')) local_response.content = json.dumps(data) # upate content-length for change in data local_response['content-length'] = len(local_response.content) # needed to allow external site (i.e. jekyll export) # to use deepzoom local_response['Access-Control-Allow-Origin'] = '*' else: # include response content if any local_response.content = remote_response.content return local_response
def get(self, request): # Include absolute API links as per annotator 2.0 documentation # http://docs.annotatorjs.org/en/latest/modules/storage.html#storage-api base_url = absolutize_url(reverse('annotation-api:index')) return JsonResponse({ "name": "Annotator Store API", "version": "2.0.0", "links": { "annotation": { "create": { "desc": "Create a new annotation", "method": "POST", "url": "%sannotations" % base_url }, "delete": { "desc": "Delete an annotation", "method": "DELETE", "url": "%sannotations/:id" % base_url }, "read": { "desc": "Get an existing annotation", "method": "GET", "url": "%sannotations/:id" % base_url }, "update": { "desc": "Update an existing annotation", "method": "PUT", "url": "%sannotations/:id" % base_url } }, "search": { "desc": "Basic search API", "method": "GET", "url": "%ssearch" % base_url } } })
def get(self, request, *args, **kwargs): url = self.get_redirect_url(*args, **kwargs) # use headers to allow browsers to cache downloaded copies headers = {} for header in ['HTTP_IF_MODIFIED_SINCE', 'HTTP_IF_UNMODIFIED_SINCE', 'HTTP_IF_MATCH', 'HTTP_IF_NONE_MATCH']: if header in request.META: headers[header.replace('HTTP_', '')] = request.META.get(header) remote_response = requests.get(url, headers=headers) local_response = HttpResponse() local_response.status_code = remote_response.status_code # include response headers, except for server-specific items for header, value in remote_response.headers.iteritems(): if header not in ['Connection', 'Server', 'Keep-Alive', 'Link']: # 'Access-Control-Allow-Origin', 'Link']: # FIXME: link header is valuable, but would # need to be made relative to current url local_response[header] = value # special case, for deep zoom (hack) if kwargs['mode'] == 'info': data = remote_response.json() # need to adjust the id to be relative to current url # this is a hack, patching in a proxy iiif interface at this url data['@id'] = absolutize_url(request.path.replace('/info/', '/iiif')) local_response.content = json.dumps(data) # upate content-length for change in data local_response['content-length'] = len(local_response.content) # needed to allow external site (i.e. jekyll export) # to use deepzoom local_response['Access-Control-Allow-Origin'] = '*' else: # include response content if any local_response.content = remote_response.content return local_response
def annotation_to_tei(annotation, teivol): '''Generate a tei note from an annotation. Sets annotation id, slugified tags as ana attribute, username as resp attribute, and annotation content is converted from markdown to TEI. :param annotation: :class:`~readux.annotations.models.Annotation` :param teivol: :class:`~readux.books.tei.AnnotatedFacsimile` tei document, for converting related page ARK uris into TEI ids :returns: :class:`readux.books.tei.Note` ''' # NOTE: annotation created/edited dates are not included here # because they were determined not to be relevant for our purposes # sample note provided by Alice # <note resp="JPK" xml:id="oshnp50n1" n="1"><p>This is an example note.</p></note> # convert markdown-formatted text content to tei note_content = markdown_tei.convert(annotation.text) # markdown results could be a list of paragraphs, and not a proper # xml tree; also, pags do not include namespace # wrap in a note element and set the default namespace as tei teinote = load_xmlobject_from_string('<note xmlns="%s">%s</note>' % \ (teimap.TEI_NAMESPACE, note_content), tei.Note) # what id do we want? annotation uuid? url? teinote.id = 'annotation-%s' % annotation.id # can't start with numeric teinote.href = absolutize_url(annotation.get_absolute_url()) teinote.type = 'annotation' # if an annotation includes tags, reference them by slugified id in @ana if 'tags' in annotation.info() and annotation.info()['tags']: tags = ' '.join( set('#%s' % slugify(t.strip()) for t in annotation.info()['tags'])) teinote.ana = tags # if the annotation has an associated user, mark the author # as responsible for the note if annotation.user: teinote.resp = annotation.user.username # include full markdown of the annotation, as a backup for losing # content converting from markdown to tei, and for easy display teinote.markdown = annotation.text # if annotation contains related pages, generate a link group if annotation.related_pages: for rel_page in annotation.related_pages: page_ref = tei.Ref(text=rel_page, type='related page') # find tei page identifier from the page ark target = teivol.page_id_by_xlink(rel_page) if target is not None: page_ref.target = '#%s' % target teinote.related_pages.append(page_ref) # if annotation includes citations, add them to the tei # NOTE: expects these citations to be TEI encoded already (generated # by the zotero api and added via meltdown-zotero annotator plugin) if annotation.extra_data.get('citations', None): for bibl in annotation.extra_data['citations']: # zotero tei export currently includes an id that is not # a valid ncname (contains : and /) bibsoup = BeautifulSoup(bibl, 'xml') # convert xml id into the format we want: # zotero-#### (zotero item id) for bibl_struct in bibsoup.find_all('biblStruct'): bibl_struct['xml:id'] = 'zotero-%s' % \ bibl_struct['xml:id'].split('/')[-1] teibibl = load_xmlobject_from_string(bibsoup.biblStruct.prettify(), tei.BiblStruct) teinote.citations.append(teibibl) return teinote
def volume_url(self, obj): # generate an absolute url to the pdf for a volume object return absolutize_url( urllib.unquote(reverse('books:volume', kwargs={'pid': obj.pid})))
def pdf_url(self, obj): # generate an absolute url to the pdf for a volume object return absolutize_url(obj.pdf_url())
def generate_volume_tei(self): '''Generate TEI for a volume by combining the TEI for all pages.''' if not self.has_tei: return # store volume TEI in django cache, because generating TEI # for a large volume is expensive (fedora api calls for each page) cache_key = '%s-tei' % self.pid vol_tei_xml = cache.get(cache_key, None) if vol_tei_xml: logger.debug('Loading volume TEI for %s from cache' % self.pid) vol_tei = xmlmap.load_xmlobject_from_string(vol_tei_xml, tei.Facsimile) # if tei was not in the cache, generate it if vol_tei_xml is None: start = time.time() vol_tei = tei.Facsimile() # populate header information vol_tei.create_header() vol_tei.header.title = self.title # publication statement vol_tei.distributor = settings.TEI_DISTRIBUTOR vol_tei.pubstmt.distributor_readux = 'Readux' vol_tei.pubstmt.desc = 'TEI facsimile generated by Readux version %s' % __version__ # source description - original publication vol_tei.create_original_source() vol_tei.original_source.title = self.title # original publication date if self.date: vol_tei.original_source.date = self.date[0] # if authors are set, it should be a list if self.creator: vol_tei.original_source.authors = self.creator # source description - digital edition vol_tei.create_digital_source() vol_tei.digital_source.title = '%s, digital edition' % self.title vol_tei.digital_source.date = self.digital_ed_date # FIXME: ideally, these would be ARKs, but ARKs for readux volume # content do not yet resolve to Readux urls vol_tei.digital_source.url = absolutize_url(self.get_absolute_url()) vol_tei.digital_source.pdf_url = absolutize_url(self.pdf_url()) # loop through pages and add tei content # for page in self.pages[:10]: # FIXME: temporary, for testing/speed page_order = 1 for page in self.pages: if page.tei.exists and page.tei.content.page: # include facsimile page *only* from the tei for each page # tei facsimile already includes a graphic url teipage = page.tei.content.page # add a reference from tei page to readux page # pages should have ARKS; fall back to readux url if # ark is not present (only expected to happen in dev) teipage.href = page.ark_uri or absolutize_url(page.get_absolute_url()) # NOTE: generating ark_uri currently requires loading # DC from fedora; could we generate reliably based on the pid? # teipage.n = page.page_order teipage.n = page_order # NOTE: normally we would use page.page_order, but that # requires an additional api call for each page # to load the rels-ext, so use a local counter instead # ensure graphic elements are present for image variants # full size, page size, thumbnail, and deep zoom variants # NOTE: graphic elements need to come immediately after # surface and before zone; adding them before removing # existing graphic element should place them correctly. # mapping of types we want in the tei and # corresponding mode to pass to the url image_types = { 'full': 'fs', 'page': 'single-page', 'thumbnail': 'thumbnail', 'small-thumbnail': 'mini-thumbnail', 'json': 'info', } for image_type, mode in image_types.iteritems(): teipage.graphics.append(tei.Graphic(rend=image_type, url=absolutize_url(reverse('books:page-image', kwargs={'vol_pid': self.pid, 'pid': page.pid, 'mode': mode}))), ) # page tei should have an existing graphic reference # remove it from our output if teipage.graphics[0].rend is None: del teipage.graphics[0] vol_tei.page_list.append(teipage) page_order += 1 logger.info('Volume TEI for %s with %d pages generated in %.02fs' % \ (self.pid, len(self.pages), time.time() - start)) # update current date for either version (new or cached) # store current date (tei generation) in publication statement export_date = datetime.now() vol_tei.pubstmt.date = export_date vol_tei.pubstmt.date_normal = export_date # save current volume tei in django cache cache.set(cache_key, vol_tei.serialize(), 3000) return vol_tei
def absolute_url(self): '''Generate an absolute url to the page view, for external services or for referencing in annotations.''' return absolutize_url(self.get_absolute_url())
def fulltext_absolute_url(self): '''Generate an absolute url to the text view for this volume for use with external services such as voyant-tools.org''' return absolutize_url(reverse('books:text', kwargs={'pid': self.pid}))
def volume_url(self, obj): # generate an absolute url to the pdf for a volume object return absolutize_url(urllib.unquote(reverse('books:volume', kwargs={'pid': obj.pid})))
def annotation_to_tei(annotation, teivol): '''Generate a tei note from an annotation. Sets annotation id, slugified tags as ana attribute, username as resp attribute, and annotation content is converted from markdown to TEI. :param annotation: :class:`~readux.annotations.models.Annotation` :param teivol: :class:`~readux.books.tei.AnnotatedFacsimile` tei document, for converting related page ARK uris into TEI ids :returns: :class:`readux.books.tei.Note` ''' # NOTE: annotation created/edited dates are not included here # because they were determined not to be relevant for our purposes # sample note provided by Alice # <note resp="JPK" xml:id="oshnp50n1" n="1"><p>This is an example note.</p></note> # convert markdown-formatted text content to tei note_content = markdown_tei.convert(annotation.text) # markdown results could be a list of paragraphs, and not a proper # xml tree; also, pags do not include namespace # wrap in a note element and set the default namespace as tei teinote = load_xmlobject_from_string('<note xmlns="%s">%s</note>' % \ (teimap.TEI_NAMESPACE, note_content), tei.Note) # what id do we want? annotation uuid? url? teinote.id = 'annotation-%s' % annotation.id # can't start with numeric teinote.href = absolutize_url(annotation.get_absolute_url()) teinote.type = 'annotation' # if an annotation includes tags, reference them by slugified id in @ana if 'tags' in annotation.info() and annotation.info()['tags']: tags = ' '.join(set('#%s' % slugify(t.strip()) for t in annotation.info()['tags'])) teinote.ana = tags # if the annotation has an associated user, mark the author # as responsible for the note if annotation.user: teinote.resp = annotation.user.username # include full markdown of the annotation, as a backup for losing # content converting from markdown to tei, and for easy display teinote.markdown = annotation.text # if annotation contains related pages, generate a link group if annotation.related_pages: for rel_page in annotation.related_pages: page_ref = tei.Ref(text=rel_page, type='related page') # find tei page identifier from the page ark target = teivol.page_id_by_xlink(rel_page) if target is not None: page_ref.target = '#%s' % target teinote.related_pages.append(page_ref) # if annotation includes citations, add them to the tei # NOTE: expects these citations to be TEI encoded already (generated # by the zotero api and added via meltdown-zotero annotator plugin) if annotation.extra_data.get('citations', None): for bibl in annotation.extra_data['citations']: # zotero tei export currently includes an id that is not # a valid ncname (contains : and /) bibsoup = BeautifulSoup(bibl, 'xml') # convert xml id into the format we want: # zotero-#### (zotero item id) for bibl_struct in bibsoup.find_all('biblStruct'): bibl_struct['xml:id'] = 'zotero-%s' % \ bibl_struct['xml:id'].split('/')[-1] teibibl = load_xmlobject_from_string(bibsoup.biblStruct.prettify(), tei.BiblStruct) teinote.citations.append(teibibl) return teinote