forked from emory-libraries/findingaids
-
Notifications
You must be signed in to change notification settings - Fork 0
/
models.py
858 lines (690 loc) · 35.3 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
# file findingaids/fa/models.py
#
# Copyright 2012 Emory University Library
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime
import logging
import os
from django.conf import settings
from django.contrib.sites.models import Site
from django.core.cache import cache
from django.core.urlresolvers import reverse
from django.db import models
from eulxml import xmlmap
from eulxml.xmlmap import eadmap
from eulexistdb.manager import Manager
from eulexistdb.models import XmlModel
from findingaids.utils import normalize_whitespace
# logging
logger = logging.getLogger(__name__)
# finding aid models
ID_DELIMITER = '_'
class DigitalArchivalObject(eadmap.DigitalArchivalObject):
show = xmlmap.StringField("@xlink:show")
'attribute to determine how the resource should be displayed'
class Name(XmlModel):
'''XmlObject for a generic name in an EAD document. Includes common
functionality for persname, famname, corpname, and geogname.
'''
ROOT_NS = eadmap.EAD_NAMESPACE
ROOT_NAMESPACES = {
'e': eadmap.EAD_NAMESPACE,
}
source = xmlmap.StringField('@source')
role = xmlmap.StringField('@role')
authfilenumber = xmlmap.StringField('@authfilenumber')
encodinganalog = xmlmap.StringField('@encodinganalog')
value = xmlmap.StringField(".", normalize=True)
# NOTE: could be persname, corpname, famname
@property
def is_personal_name(self):
'boolean indicator if this is a persname tag'
return self.node.tag == "{%s}persname" % eadmap.EAD_NAMESPACE
@property
def is_corporate_name(self):
'boolean indicator if this is a corpname tag'
return self.node.tag == "{%s}corpname" % eadmap.EAD_NAMESPACE
@property
def is_family_name(self):
'boolean indicator if this is a famname tag'
return self.node.tag == "{%s}famname" % eadmap.EAD_NAMESPACE
@property
def is_geographic_name(self):
'boolean indicator if this is a geogname tag'
return self.node.tag == "{%s}geogname" % eadmap.EAD_NAMESPACE
@property
def uri(self):
''''Generate a URI for this name if possible, based on source and
authfilenumber attributes. Currently supports viaf, geonames,
and dbpedia sources.
'''
if self.source == 'viaf':
return 'http://viaf.org/viaf/%s' % self.authfilenumber
elif self.source == 'geonames':
return 'http://sws.geonames.org/%s/' % self.authfilenumber
elif self.source == 'dbpedia':
return 'http://dbpedia.org/resource/%s' % self.authfilenumber
class FindingAid(XmlModel, eadmap.EncodedArchivalDescription):
"""
Customized version of :class:`eulxml.EncodedArchivalDescription` EAD object.
Additional fields and methods are used for search, browse, and display.
"""
ROOT_NAMESPACES = {
'e': eadmap.EAD_NAMESPACE,
'xlink': eadmap.XLINK_NAMESPACE,
'exist': 'http://exist.sourceforge.net/NS/exist',
'util': 'http://exist-db.org/xquery/util',
}
# redeclaring namespace from eulcore to ensure prefix is correct for xpaths
# NOTE: overridding these fields from EncodedArchivalDescription to allow
# for efficiently retrieving unittitle and abstract in the full document OR
# in the constructed return object returned from eXist for search/browse
unittitle = xmlmap.NodeField('.//e:unittitle[not(ancestor::e:dsc)]', eadmap.UnitTitle)
abstract = xmlmap.NodeField('.//e:abstract[not(ancestor::e:dsc)]', xmlmap.XmlObject)
physical_descriptions = xmlmap.StringListField('.//e:physdesc[not(ancestor::e:dsc)]', normalize=True)
list_title_xpaths = [
"e:archdesc/e:did/e:origination/e:corpname",
"e:archdesc/e:did/e:origination/e:famname",
"e:archdesc/e:did/e:origination/e:persname",
"e:archdesc/e:did[not(e:origination/e:corpname or e:origination/e:famname or e:origination/e:persname)]/e:unittitle"
]
list_title_xpath = "|".join("./%s" % xp for xp in list_title_xpaths)
#./archdesc/did/origination/node()|./archdesc/did[not(origination/node())]/unittitle"
# field to use for alpha-browse - any origination name, fall back to unit title if no origination
list_title = xmlmap.NodeField(list_title_xpath, xmlmap.XmlObject)
"list title used for alphabetical browse - any origination name, or unittitle if there is none"
# first letter of title field
first_letter = xmlmap.StringField("substring(normalize-space(%s),1,1)" % list_title_xpath)
"First letter of list title"
dc_subjects = xmlmap.StringListField('e:archdesc//e:controlaccess/e:subject[@encodinganalog = "650"] | \
e:archdesc//e:controlaccess/e:persname[@encodinganalog = "600"] | \
e:archdesc//e:controlaccess/e:corpname[@encodinganalog = "610"] | \
e:archdesc//e:controlaccess/e:corpname[@encodinganalog = "611"] | \
e:archdesc//e:controlaccess/e:geogname[@encodinganalog = "651"]', normalize=True)
"control access fields that should be mapped to Dublin Core subject, based on encodinganalog attribute"
dc_contributors = xmlmap.StringListField('e:archdesc//e:controlaccess/e:persname[@encodinganalog = "700"] | \
e:archdesc//e:controlaccess/e:corpname[@encodinganalog = "710"]', normalize=True)
"control access fields that should be mapped to Dublin Core contributor, based on encodinganalog attribute"
# convenience mapping for searching on subject fields
subject = xmlmap.StringField('.//e:controlaccess')
# local repository *subarea* - e.g., MARBL, University Archives
repository = xmlmap.StringListField('.//e:subarea', normalize=True)
# boosted fields in the index: must be searched to get proper relevance score
boostfields = xmlmap.StringField('.//e:titleproper | .//e:origination | \
.//e:abstract | .//e:bioghist | .//e:scopecontent | .//e:controlaccess')
# temporary manual mapping for processinfo, will be incorporated into a release of eulxml
process_info = xmlmap.NodeField("e:archdesc/e:processinfo", eadmap.Section)
# is mapped as single in eulxml.eadmap but could be multiple
separatedmaterial_list = xmlmap.NodeListField("e:archdesc/e:separatedmaterial", eadmap.Section)
relatedmaterial_list = xmlmap.NodeListField("e:archdesc/e:relatedmaterial", eadmap.Section)
# match-count on special groups of data for table of contents listing
# - administrative info fields
_admin_info = ['userestrict', 'altformavail', 'relatedmaterial', 'separatedmaterial',
'acqinfo', 'custodhist', 'prefercite']
# -- map as regular xmlmap field, for use when entire object is returned
admin_info_matches = xmlmap.IntegerField(
'count(./e:archdesc/*[' +
'|'.join(['self::e:%s' % field for field in _admin_info]) + ']//exist:match)')
# -- eXist-specific xpath for returning count without entire document
admin_info_matches_xpath = 'count(util:expand(%(xq_var)s/e:archdesc/(' + \
'|'.join(['e:%s' % field for field in _admin_info]) + '))//exist:match)'
# - collection description fields
_coll_desc = ['bioghist', 'bibliography', 'scopecontent', 'arrangement', 'otherfindaid']
# -- map as regular xmlmap field, for use when entire object is returned
coll_desc_matches = xmlmap.IntegerField(
'count(' + '|'.join('./e:archdesc/e:%s//exist:match' % field for field in _coll_desc) + ')')
# -- eXist-specific xpath for returning count without entire document
coll_desc_matches_xpath = 'count(util:expand(%(xq_var)s/e:archdesc/(' + \
'|'.join('e:%s' % field for field in _coll_desc) + '))//exist:match)'
# - controlaccess match-count
controlaccess_matches_xpath = 'count(util:expand(%(xq_var)s/e:archdesc/e:controlaccess)//exist:match)'
origination_name = xmlmap.NodeField('e:archdesc/e:did/e:origination/e:*', Name)
'origination name, as an instance of :class:`Name`'
# dao anywhere in the ead, to allow filtering on finding aids with daos
daos = xmlmap.NodeListField('.//e:dao', DigitalArchivalObject)
#: count of public dao elements in a record, to support search filtering
#: "public" is defined as audience external or not set, xlink:href present,
#: and show not set to none.
public_dao_count = xmlmap.IntegerField('count(.//e:dao[@xlink:href][not(@xlink:show="none")][not(@audience) or @audience="external"])')
objects = Manager('/e:ead')
""":class:`eulcore.django.existdb.manager.Manager` - similar to an object manager
for django db objects, used for finding and retrieving FindingAid objects
in eXist.
Configured to use */ead* as base search path.
"""
@property
def has_digital_content(self):
'boolean to indicate whether or not this EAD includes public digital content'
return self.public_dao_count >= 1
# NOTE: if using partial xml return, requires that public_dao_count is included
def admin_info(self):
"""
Generate a list of administrative information fields from the archive description.
Only includes MARBL-designated fields to be displayed as administrative information,
in a specified order. Any fields not present in the finding aid will
not be included in the list returned.
These fields are included, in this order:
access restrictions, use restrictions, alternate form, related material,
separated material, acquisition info, custodial history, preferred citation
:rtype: list of :class:`eulcore.xmlmap.eadmap.Section`
"""
info = []
if self.archdesc.access_restriction:
info.append(self.archdesc.access_restriction)
if self.archdesc.use_restriction:
info.append(self.archdesc.use_restriction)
if self.archdesc.alternate_form:
info.append(self.archdesc.alternate_form)
for rel_m in self.relatedmaterial_list:
info.append(rel_m)
for sep_m in self.separatedmaterial_list:
info.append(sep_m)
if self.archdesc.acquisition_info:
info.append(self.archdesc.acquisition_info)
if self.archdesc.custodial_history:
info.append(self.archdesc.custodial_history)
if self.archdesc.preferred_citation:
info.append(self.archdesc.preferred_citation)
if self.process_info:
info.append(self.process_info)
return info
def collection_description(self):
"""
Generate a list of collection description fields from the archive description.
Only includes MARBL-designated fields to be displayed as collection description,
in a specified order. Any fields not present in the finding aid will
not be included in the list returned.
These fields are included, in this order:
biography/history, bibliography, scope & content, arrangement, other finding aid
:rtype: list of :class:`eulcore.xmlmap.eadmap.Section`
"""
fields = []
if self.archdesc.biography_history:
fields.append(self.archdesc.biography_history)
if self.archdesc.bibliography:
fields.append(self.archdesc.bibliography)
if self.archdesc.scope_content:
fields.append(self.archdesc.scope_content)
if self.archdesc.arrangement:
fields.append(self.archdesc.arrangement)
if self.archdesc.other:
fields.append(self.archdesc.other)
return fields
def dc_fields(self):
"""
Generate a dictionary of Dublin Core fields from an EAD.
Dictionary key: base name of a Dublin Core field (e.g., creator, title)
Dictionary value: a list of values corresponding to the DC field.
Note that some keys may have empty lists as their values.
:rtype: dict
"""
fields = dict()
fields["title"] = set([self.title, self.unittitle])
fields["creator"] = set([name for name in [self.archdesc.origination] if name])
fields["publisher"] = set([self.file_desc.publication.publisher])
fields["date"] = set([date.normalized for date in [self.profiledesc.date] if date])
fields["language"] = set(self.profiledesc.language_codes)
fields["subject"] = set(self.dc_subjects)
fields["contributor"] = set(self.dc_contributors)
fields["identifier"] = set([self.eadid.url])
return fields
collection_id = xmlmap.StringField('e:archdesc/e:did/e:unitid/@identifier')
def collection_uri(self):
# URI to use in RDF for the archival collection, as distinguished
# from the findingaid document that describes the collection and
# the materials that it includes.
if self.collection_id is not None and \
self.collection_id.startswith('http'):
# if collection id is a URL, use that
return self.collection_id
else:
# otherwise use findingaid ARK as base for collection URI
return '%s#collection' % self.eadid.url
def absolute_eadxml_url(self):
''' Generate an absolute url to the xml view for this ead
for use with external services such as Aeon'''
current_site = Site.objects.get_current()
return ''.join([
'http://',
current_site.domain.rstrip('/'),
reverse('fa:eadxml', kwargs={"id":self.eadid.value})
])
def request_materials_url(self):
''' Construct the absolute url for use with external services such as Aeon'''
if not hasattr(settings,'REQUEST_MATERIALS_URL') or not settings.REQUEST_MATERIALS_URL:
logger.warn("Request materials url is not configured.")
return
base = settings.REQUEST_MATERIALS_URL
return ''.join([base, self.absolute_eadxml_url()])
def requestable(self):
''' Determines if the EAD is applicable for the electronic request service.'''
# If the request url is not configured, then the request can't be generated.
if not hasattr(settings,'REQUEST_MATERIALS_URL') or not settings.REQUEST_MATERIALS_URL:
return False
if not hasattr(settings,'REQUEST_MATERIALS_REPOS') or not settings.REQUEST_MATERIALS_REPOS:
return False
# If the item is in on of the libraries defined, then it should be displayed.
return any([normalize_whitespace(repo) in settings.REQUEST_MATERIALS_REPOS
for repo in self.repository])
class ListTitle(XmlModel):
# EAD list title - used to retrieve at the title level for better query response
ROOT_NAMESPACES = {'e': eadmap.EAD_NAMESPACE}
xpath = "|".join("//%s" % xp for xp in FindingAid.list_title_xpaths)
# xpath to use for alpha-browse - using list title xpaths from FindingAid
# first letter of list title field (using generic item field to avoid string() conversion)
first_letter = xmlmap.ItemField("substring(.,1,1)")
"First letter of a finding aid list title: use to generate list of first-letters for browse."
objects = Manager(xpath)
def title_letters():
"""Cached list of distinct, sorted first letters present in all Finding Aid titles.
Cached results should be refreshed after half an hour."""
cache_key = 'browse-title-letters'
if cache.get(cache_key) is None:
letters = ListTitle.objects.only('first_letter').order_by('first_letter').distinct()
cache.set(cache_key, list(letters)) # use configured cache timeout
return cache.get(cache_key)
class EadRepository(XmlModel):
ROOT_NAMESPACES = {'e': eadmap.EAD_NAMESPACE}
normalized = xmlmap.StringField('normalize-space(.)')
objects = Manager('//e:subarea')
@staticmethod
def distinct():
"""Cached list of distinct owning repositories in all Finding Aids."""
cache_key = 'findingaid-repositories'
if cache.get(cache_key) is None:
# using normalized version because whitespace is inconsistent in this field
repos = EadRepository.objects.only('normalized').distinct()
cache.set(cache_key, list(repos)) # use configured default cache timeout
return cache.get(cache_key)
class LocalComponent(eadmap.Component):
'''Extend default :class:`eulcore.xmlmap.eadmap.Component` class to add a
method to detect first file-item in a list. (Needed for container list display
in templates).'''
ROOT_NAMESPACES = {
'e': eadmap.EAD_NAMESPACE,
}
# by local convention, section headers are sibling components with no containers
preceding_files = xmlmap.NodeListField('preceding-sibling::node()[@level="file"][e:did/e:container]', "self")
@property
def first_file_item(self):
'Boolean: True if this component is the first file item in a series/container list'
return len(self.did.container) and len(self.preceding_files) == 0
def title_rdf_identifier(src, idno):
''''Generate an RDF identifier for a title, based on source and id
attributes. Currently supports ISSN, ISBN, and OCLC.'''
src = src.lower()
idno = idno.strip() # remove whitespace, just in case of errors in entry
if src in ['isbn', 'issn']: # isbn and issn URNs have same format
return 'urn:%s:%s' % (src.upper(), idno)
elif src == 'oclc':
return 'http://www.worldcat.org/oclc/%s' % idno
class Title(xmlmap.XmlObject):
'''A title in an EAD document, with access to attributes for type of title,
render, source, and authfilenumber.
'''
ROOT_NAMESPACES = {'e': eadmap.EAD_NAMESPACE}
ROOT_NAME = 'title'
type = xmlmap.StringField('@type')
render = xmlmap.StringField('@render')
source = xmlmap.StringField('@source')
authfilenumber = xmlmap.StringField('@authfilenumber')
value = xmlmap.StringField('.', normalize=True)
@property
def rdf_identifier(self):
''''RDF identifier for this title, if source and authfilenumber attributes
are present and can be converted into a URI or URN'''
return title_rdf_identifier(self.source, self.authfilenumber)
class Series(XmlModel, LocalComponent):
"""
Top-level (c01) series.
Customized version of :class:`eulcore.xmlmap.eadmap.Component`
"""
ROOT_NAMESPACES = {
'e': eadmap.EAD_NAMESPACE,
'exist': 'http://exist.sourceforge.net/NS/exist'
}
ead = xmlmap.NodeField("ancestor::e:ead", FindingAid)
":class:`findingaids.fa.models.FindingAid` access to ancestor EAD element"
# NOTE: using [1] to get closest ancestor; in case of existdb 'also'
# return result, possible to have nested ead elements.
parent = xmlmap.NodeField("parent::node()", "self")
objects = Manager('//e:c01')
# NOTE: this element should not be restricted by level=series because eXist full-text indexing
# is more efficient if the queried element matches the indexed element
""":class:`eulcore.django.existdb.manager.Manager` - similar to an object manager
for django db objects, used for finding and retrieving c01 series objects
in eXist.
Configured to use *//c01* as base search path.
"""
# temporary manual mapping for processinfo, should be incorporated into a release of eulxml
process_info = xmlmap.NodeField("e:processinfo", eadmap.Section)
match_count = xmlmap.IntegerField("count(.//exist:match)")
":class:`findingaids.fa.models.FindingAid` number of keyword matchs"
_unittitle_name_xpath = '|'.join('e:did/e:unittitle/e:%s' % t
for t in ['persname', 'corpname', 'geogname'])
#: tagged name in the unittitle; used for RDFa output
unittitle_name = xmlmap.NodeField(_unittitle_name_xpath, Name)
'name in the unittitle, as an instance of :class:`Name`'
#: list of tagged names in the unittite; used for RDFa output
unittitle_names = xmlmap.NodeListField(_unittitle_name_xpath, Name)
'names in the unittitle, as a list of :class:`Name`'
#: list of titles in the unittitle; used for RDFa output
unittitle_titles = xmlmap.NodeListField('e:did/e:unittitle/e:title', Title)
'list of titles in the unittitle, as :class:`Title`'
#: title of the nearest ancestor series;
#: used to determine what kind of content this is, for default rdf type
series_title = xmlmap.StringField('''ancestor::e:c03[@level="subsubseries"]/e:did/e:unittitle
|ancestor::e:c02[@lavel="subseries"]/e:did/e:unittitle
|ancestor::e:c01[@level="series"]/e:did/e:unittitle''',
normalize=True)
def series_info(self):
""""
Generate a list of sereies information fields.
Only includes MARBL-designated fields to be displayed as part of series
description, in a specified order. Any fields not present in the finding
aid will not be included in the list returned.
These fields are included, in this order:
biography/history, scope & content, arrangement, other finding aid,
use restrictions, access restrictions, alternate form, location of original,
bibliography
:rtype: list of :class:`eulcore.xmlmap.eadmap.Section`
"""
fields = []
if self.biography_history:
fields.append(self.biography_history)
if self.scope_content:
fields.append(self.scope_content)
if self.arrangement:
fields.append(self.arrangement)
if self.other:
fields.append(self.other)
if self.use_restriction:
fields.append(self.use_restriction)
if self.access_restriction:
fields.append(self.access_restriction)
if self.alternate_form:
fields.append(self.alternate_form)
if self.originals_location:
fields.append(self.originals_location)
if self.bibliography:
fields.append(self.bibliography)
if self.related_material:
fields.append(self.related_material)
if self.process_info:
fields.append(self.process_info)
return fields
def display_label(self):
"Series display label - *unitid : unittitle* (if unitid) or *unittitle* (if no unitid)"
return ': '.join([unicode(u) for u in [self.did.unitid, self.did.unittitle] if u])
_short_id = None
@property
def short_id(self):
"Short-form id (without eadid prefix) for use in external urls."
if self._short_id is None:
# get eadid, if available
if hasattr(self, 'ead') and hasattr(self.ead, 'eadid') and self.ead.eadid.value:
eadid = self.ead.eadid.value
else:
eadid = None
self._short_id = shortform_id(self.id, eadid)
return self._short_id
@property
def has_semantic_data(self):
'''Does this item contains semantic data that should be rendered with
RDFa? Currently checks the unittitle for a tagged person, corporate, or
geographic name or for a title with source and authfilenumber attributes.'''
# - if there is at least one tagged name in the unittitle
semantic_tags = [self.unittitle_name]
# - if there are titles tagged with source/authfilenumber OR titles
# with a type
# NOTE: eventually, we will probably want to include all tagged titles,
# but for now, restrict to titles that have been enhanced in a particular way
semantic_tags.extend([t for t in self.unittitle_titles
if (t.source and t.authfilenumber) or t.type])
return any(semantic_tags)
@property
def contains_correspondence(self):
'Boolean property indicating if this series containes correspondence.'
return 'correspondence' in unicode(self.did.unittitle).lower()
@property
def rdf_type(self):
''''rdf type to use for a semantically-tagged component item'''
# NOTE: initial implementation for Belfast Group sheets assumes manuscript
# type; should be refined for other types of content
rdf_type = None
if self.unittitle_titles:
# if type of first title is article, return article
if self.unittitle_titles[0].type and \
self.unittitle_titles[0].type.lower() == 'article':
rdf_type = 'bibo:Article'
# if two titles and the second has an issn, article in a periodical
# (TODO: is this close enough for all cases?)
elif len(self.unittitle_titles) == 2 and self.unittitle_titles[1].source \
and self.unittitle_titles[1].source.upper() == 'ISSN':
rdf_type = 'bibo:Article'
# if title has an isbn, assume it is a book
# - for now, also assume OCLC source is book (FIXME: is this accurate?)
elif self.unittitle_titles[0].source \
and self.unittitle_titles[0].source.upper() in ['ISBN', 'OCLC']:
rdf_type = 'bibo:Book'
else:
rdf_type = self.generic_rdf_type_by_series()
# if there are no titles but there is a name with a role of creator,
# the component describes some kind of entity, so set the type
# based on series
elif self.unittitle_names and 'dc:creator' in [n.role for n in self.unittitle_names]:
rdf_type = self.generic_rdf_type_by_series()
return rdf_type
def generic_rdf_type_by_series(self):
'''Calculate a generic RDF type based on the series an item belongs to.
Using bibo:Document for printed material, bibo:Image for photographs,
bibo:AudioVisualDocument for audio/visual materials, with a fallback
of bibo:Manuscript.'''
if self.series_title:
series_title = self.series_title.lower()
# if in a Printed Material series, assume bibo:Document
# - printed material is usually included in text for series and
# subseries names
if 'printed material' in series_title:
return 'bibo:Document'
# if in a Photographs series, use bibo:Image
elif 'photograph' in series_title:
return 'bibo:Image'
# if in an AudioVisual series, use bibo:AudioVisualDocument
elif 'audiovisual' in series_title or \
'audio recordings' in series_title or \
'video recordings' in series_title:
# audiovisual usually used at top-level, audio/video rec. used for subseries
return 'bibo:AudioVisualDocument'
# otherwise, use bibo:Manuscript
return 'bibo:Manuscript'
@property
def rdf_identifier(self):
# if the item in the unittitle has an rdf identifier, make it available
# for use in constructing RDFa in the templates
# for now, assuming that the first title listed is the *thing*
# in the collection. If we can generate an id for it (i.e.,
# it has a source & authfilenumber), use that
if self.unittitle_titles:
return self.unittitle_titles[0].rdf_identifier
# NOTE: previously, was only returning an rdf identifier for a
# single title
# for now, only return when these is one single title
# if len(self.unittitle_titles) == 1 :
# return self.unittitle_titles[0].rdf_identifier
@property
def rdf_mentions(self):
# names related to the title that should also be related to the collection
# titles after the first two need to be handled separately here also
return self.rdf_type is not None and len(self.unittitle_names) \
or len(self.unittitle_titles) > 1
@property
def mention_titles(self):
# list of secondary titles that should be mentioned
# if we have a multiple titles with an author, the titles
# are being treated as a list and should not be exposed
# (i.e., belfast group sheets)
if self.unittitle_names and any(n.role for n in self.unittitle_names) \
or len(self.unittitle_titles) <= 1:
return []
else:
# return all but the first title
return list(self.unittitle_titles)[1:]
# override component.c node_class
# subcomponents need to be initialized as Series to get display_label, series list...
# FIXME: look for a a better way to do this kind of XmlObject extension
eadmap.Component._fields['c'].node_class = Series
eadmap.SubordinateComponents._fields['c'].node_class = Series
# override DigitalArchivalObject with local version
eadmap.DescriptiveIdentification._fields['dao_list'].node_class = DigitalArchivalObject
eadmap.Component._fields['dao_list'].node_class = DigitalArchivalObject
eadmap.ArchivalDescription._fields['dao_list'].node_class = DigitalArchivalObject
def shortform_id(id, eadid=None):
"""Calculate a short-form id (without eadid prefix) for use in external urls.
Uses eadid if available; otherwise, relies on the id delimiter character.
:param id: id to be shortened
:param eadid: eadid prefix, if available
:returns: short-form id
"""
# if eadid is available, use that (should be the most reliable way to shorten id)
if eadid:
id = id.replace('%s_' % eadid, '')
# if eadid is not available, split on _ and return latter portion
elif ID_DELIMITER in id:
eadid, id = id.split(ID_DELIMITER)
# this shouldn't happen - one of the above two options should work
else:
raise Exception("Cannot calculate short id for %s" % id)
return id
class Series2(Series):
"""
c02 level subseries
Customized version of :class:`eulcore.xmlmap.eadmap.Component`; extends
:class:`Series`
"""
series = xmlmap.NodeField("parent::e:c01", Series)
":class:`findingaids.fa.models.Series` access to c01 series this subseries belongs to"
objects = Manager('//e:c02')
""":class:`eulcore.django.existdb.manager.Manager`
Configured to use *//c02* as base search path.
"""
class Series3(Series):
"""
c03 level subseries
Customized version of :class:`eulcore.xmlmap.eadmap.Component`; extends
:class:`Series`
"""
series2 = xmlmap.NodeField("parent::e:c02", Series2)
":class:`findingaids.fa.models.Subseries` access to c02 subseries this sub-subseries belongs to"
series = xmlmap.NodeField("ancestor::e:c01", Series)
":class:`findingaids.fa.models.Series` access to c01 series this sub-subseries belongs to"
objects = Manager('//e:c03')
""":class:`eulcore.django.existdb.manager.Manager`
Configured to use *//c03* as base search path.
"""
class Index(XmlModel, eadmap.Index):
"""
EAD Index, with index entries.
Customized version of :class:`eulcore.xmlmap.eadmap.Index`
"""
ROOT_NAMESPACES = {
'e': eadmap.EAD_NAMESPACE,
'xlink': eadmap.XLINK_NAMESPACE,
'exist': 'http://exist.sourceforge.net/NS/exist'
}
ead = xmlmap.NodeField("ancestor::e:ead", FindingAid)
":class:`findingaids.fa.models.FindingAid` access to ancestor EAD element"
parent = xmlmap.NodeField("parent::node()", "self")
objects = Manager('//e:index')
""":class:`eulcore.django.existdb.manager.Manager` - similar to an object manager
for django db objects, used for finding and retrieving index objects
in eXist.
Configured to use *//index* as base search path.
"""
match_count = xmlmap.IntegerField("count(.//exist:match)")
_short_id = None
@property
def short_id(self):
"Short-form id (without eadid prefix) for use in external urls."
if self._short_id is None:
# get eadid, if available
if hasattr(self, 'ead') and hasattr(self.ead, 'eadid') and self.ead.eadid.value:
eadid = self.ead.eadid.value
else:
eadid = None
self._short_id = shortform_id(self.id, eadid)
return self._short_id
# FIXME: look for a a better way to do this kind of XmlObject extension
eadmap.ArchivalDescription._fields['index'].node_class = Index
class FileComponent(XmlModel, eadmap.Component):
"""
Any EAD component with a level of *file*, with item-level information (box &
folder contents).
"""
ROOT_NAMESPACES = {
'e': eadmap.EAD_NAMESPACE,
'xlink': eadmap.XLINK_NAMESPACE,
'exist': 'http://exist.sourceforge.net/NS/exist'
}
ead = xmlmap.NodeField("ancestor::e:ead", FindingAid)
":class:`findingaids.fa.models.FindingAid` access to ancestor EAD element"
# NOTE: mapping parent, series1, and series2 to ensure there is enough
# information to generate a link to the series a FileComponent belongs to
parent = xmlmap.NodeField("parent::node()", Series)
":class:`findingaids.fa.models.Series` series this file belongs to (could be c01, c02, or c03)."
series1 = xmlmap.NodeField("ancestor::e:c01", Series)
":class:`findingaids.fa.models.Series` c01 series this file belongs to."
series2 = xmlmap.NodeField("ancestor::e:c02", Series)
":class:`findingaids.fa.models.Series` c02 series this file belongs to, if any."
#: count of public daos; same as in :attr:`FindingAid.public_dao_count`
public_dao_count = xmlmap.IntegerField('count(.//e:dao[@xlink:href][not(@xlink:show="none")][not(@audience) or @audience="external"])')
objects = Manager('''(e:ead//e:c01|e:ead//e:c02|e:ead//e:c03|e:ead//e:c04)[@level="file"]''')
""":class:`eulcore.django.existdb.manager.Manager` - similar to an object manager
for django db objects, used for finding and retrieving c-series file objects
in eXist.
Configured to find any c-series (1-4) with a level of file.
"""
class Deleted(models.Model):
"""
Information about a previously published finding aid that has been deleted.
"""
eadid = models.CharField('EAD Identifier', max_length=50, unique=True)
title = models.CharField(max_length=200)
date = models.DateTimeField('Date removed', default=datetime.now())
note = models.CharField(
max_length=400, blank=True,
help_text="Optional: Enter the reason this document is being deleted. " +
"These comments will be displayed to anyone who had the finding " +
"aid bookmarked and returns after it is gone.")
class Meta:
verbose_name = 'Deleted Record'
def __unicode__(self):
return self.eadid
class Archive(models.Model):
'''Model to define Archives associated with EAD documents, for use with
admin user permissions and to identify subversion repositories where
content will be published from.'''
label = models.CharField(max_length=10,
help_text='Short label to identify an archive')
name = models.CharField(max_length=255,
help_text='repository name (subarea) in EAD to identify finding aids associated with this archive')
svn = models.URLField('Subversion Repository',
help_text='URL to subversion repository containing EAD for this archive')
slug = models.SlugField(help_text='''shorthand id
(auto-generated from label; do not modify after initial archive definition)''')
def __unicode__(self):
return self.label
@property
def svn_local_path(self):
return os.path.join(settings.SVN_WORKING_DIR, self.slug)