def ingest_form(request): """Display or process the file ingest form. On GET, display the form. On valid POST, reposit the submitted file in a new digital object. """ if request.method == 'POST': form = IngestForm(request.POST, request.FILES) if form.is_valid(): # TODO: set label/dc:title based on filename; # set file mimetype in dc:format # TODO: file checksum? repo = Repository(request=request) fobj = repo.get_object(type=FileObject) st = (fobj.uriref, relsext.isMemberOfCollection, URIRef(form.cleaned_data['collection'])) fobj.rels_ext.content.add(st) fobj.master.content = request.FILES['file'] # pre-populate the object label and dc:title with the uploaded filename fobj.label = fobj.dc.content.title = request.FILES['file'].name fobj.save('ingesting user content') messages.success(request, 'Successfully ingested <a href="%s"><b>%s</b></a>' % \ (reverse('file:view', args=[fobj.pid]), fobj.pid)) return HttpResponseSeeOtherRedirect(reverse('site-index')) else: initial_data = {} # if collection is specified in url parameters, pre-select the # requested collection on the form via initial data if 'collection' in request.GET: initial_data['collection'] = request.GET['collection'] form = IngestForm(initial=initial_data) return render_to_response('file/ingest.html', {'form': form}, request=request)
def browse(request): "Browse postcards and display thumbnail images." repo = Repository() repo.default_object_type = ImageObject # TEMPORARY: restrict to postcards by pidspace # NOTE: tests rely somewhat on restriction by pidspace... search_opts = {'relation': settings.RELATION } number_of_results = 15 context = {} if 'subject' in request.GET: context['subject'] = request.GET['subject'] search_opts['subject'] = request.GET['subject'] postcards = repo.find_objects(**search_opts) postcard_paginator = Paginator(list(postcards), number_of_results) try: page = int(request.GET.get('page', '1')) except ValueError: page = 1 # If page request (9999) is out of range, deliver last page of results. try: postcard_page = postcard_paginator.page(page) except (EmptyPage, InvalidPage): postcard_page = postcard_paginator.page(paginator.num_pages) context['postcards_paginated'] = postcard_page return render_to_response('postcards/browse.html', context, context_instance=RequestContext(request))
def _load_postcard(self, label, description, subjects, filename): '''Create a postcard object and load to fedora. :param label: object label and dc:title :param description: object dc:description :param subjects: list of subjects to be set in dc:subject :param filename: filename for image content, assumed relative to current directory ''' # NOTE: image object init here somewhat redundant with current postcard ingest logic repo = Repository() obj = repo.get_object(type=ImageObject) obj.label = label obj.owner = settings.FEDORA_OBJECT_OWNERID obj.dc.content.title = obj.label obj.dc.content.description_list.extend(description) obj.dc.content.subject_list.extend(subjects) # common DC for all postcards obj.dc.content.type = 'image' # FIXME: configure this somewhere? obj.dc.content.relation_list.extend([settings.RELATION, 'http://beck.library.emory.edu/greatwar/']) # set file as content of image datastream obj.image.content = open(path.join(fixture_path, filename)) # add relation to postcard collection obj.rels_ext.content.add(( URIRef(obj.uri), URIRef(MEMBER_OF_COLLECTION), URIRef(PostcardCollection.get().uri) )) obj.save() self.postcards.append(obj)
def view_metadata(request, pid): repo = Repository(request=request) obj = repo.get_object(pid, type=FileObject) # if the object doesn't exist or user doesn't have sufficient # permissions to know that it exists, 404 if not obj.exists: raise Http404 return render_to_response('file/view.html', {'obj': obj}, request=request)
def edit_metadata(request, pid): """View to edit the metadata for an existing :class:`~genrepo.file.models.FileObject` . On GET, display the form. When valid form data is POSTed, updates thes object. """ status_code = None repo = Repository(request=request) # get the object (if pid is not None), or create a new instance obj = repo.get_object(pid, type=FileObject) # on GET, instantiate the form with existing object data (if any) if request.method == 'GET': form = DublinCoreEditForm(instance=obj.dc.content) # on POST, create a new collection object, update DC from form # data (if valid), and save elif request.method == 'POST': form = DublinCoreEditForm(request.POST, instance=obj.dc.content) if form.is_valid(): form.update_instance() # also use dc:title as object label obj.label = obj.dc.content.title try: result = obj.save('updated metadata') messages.success(request, 'Successfully updated <a href="%s"><b>%s</b></a>' % \ (reverse('file:view', args=[obj.pid]), obj.pid)) # maybe redirect to file view page when we have one return HttpResponseSeeOtherRedirect(reverse('site-index')) except (DigitalObjectSaveFailure, RequestFailed) as rf: # do we need a different error message for DigitalObjectSaveFailure? if isinstance(rf, PermissionDenied): msg = 'You don\'t have permission to modify this object in the repository.' else: msg = 'There was an error communicating with the repository.' messages.error(request, msg + ' Please contact a site administrator.') # pass the fedora error code back in the http response status_code = getattr(rf, 'code', None) # if form is not valid, fall through and re-render the form with errors response = render_to_response('file/edit.html', { 'form': form, 'obj': obj }, request=request) # if a non-standard status code is set, set it in the response before returning if status_code is not None: response.status_code = status_code return response
def setUp(self): # load test object to test views with repo = Repository() self.obj = repo.get_object(type=SimpleDigitalObject) self.obj.dc.content.title = 'test object for generic views' self.obj.text.content = 'sample plain-text content' img_file = path.join(settings.FEDORA_FIXTURES_DIR, 'test.png') self.obj.image.content = open(img_file) # force datastream checksums so we can test response headers for ds in [self.obj.dc, self.obj.rels_ext, self.obj.text, self.obj.image]: ds.checksum_type = 'MD5' self.obj.save()
def view_postcard_large(request, pid): '''View a large image of postcard with title only.''' repo = Repository() try: obj = repo.get_object(pid, type=ImageObject) obj.label # access object label to trigger 404 before we get to the template return render_to_response('postcards/view_postcard_large.html', {'card' : obj }, context_instance=RequestContext(request)) except RequestFailed: raise Http404
def search(request): # rough fedora-based postcard search (borrowed heavily from digital masters) form = SearchForm(request.GET) response_code = None context = {'search': form} number_of_results = 5 if form.is_valid(): # adding wildcards because fedora has a weird notion of what 'contains' means # TODO: terms search can't be used with with field search # -- how to allow a keyword search but restrict to postcards? #keywords = '%s*' % form.cleaned_data['keyword'].rstrip('*') # TEMPORARY: restrict to postcards by pidspace search_opts = {'relation': settings.RELATION } if 'title' in form.cleaned_data: search_opts['title__contains'] = '%s*' % form.cleaned_data['title'].rstrip('*') if 'description' in form.cleaned_data: search_opts['description__contains'] = '%s*' % form.cleaned_data['description'].rstrip('*') try: repo = Repository() found = repo.find_objects(type=ImageObject, **search_opts) search_paginator = Paginator(list(found), number_of_results) try: page = int(request.GET.get('page', '1')) except ValueError: page = 1 # If page request (9999) is out of range, deliver last page of results. try: search_page = search_paginator.page(page) except (EmptyPage, InvalidPage): search_page = search_paginator.page(paginator.num_pages) context['postcards_paginated'] = search_page context['title'] = form.cleaned_data['title'] context['description'] = form.cleaned_data['description'] except Exception as e: logging.debug(e) response_code = 500 context['server_error'] = 'There was an error ' + \ 'contacting the digital repository. This ' + \ 'prevented us from completing your search. If ' + \ 'this problem persists, please alert the ' + \ 'repository administrator.' response = render_to_response('postcards/search.html', context, context_instance=RequestContext(request)) if response_code is not None: response.status_code = response_code return response
def view_collection(request, pid): '''view an existing :class:`~genrepo.collection.models.CollectionObject` identified by pid. ''' repo = Repository(request=request) obj = repo.get_object(pid, type=CollectionObject) # if the object does not exist or the current user doesn't have # permission to see that it exists, 404 if not obj.exists: raise Http404 return render_to_response('collection/view.html', {'obj': obj}, request=request)
def remove_test_objects(): # remove any leftover test object before or after running tests # NOTE: This method expects to be called only when FEDORA_PIDSPACE has been # switched to a test pidspace # use test fedora credentials if they are set repo = Repository(username=getattr(settings, 'FEDORA_TEST_USER', None), password=getattr(settings, 'FEDORA_TEST_PASSWORD', None)) test_objects = repo.find_objects(pid__contains='%s:*' % settings.FEDORA_PIDSPACE) count = 0 for obj in test_objects: # if objects are unexpectedly not being cleaned up, pid/label may help # to isolate which test is creating the leftover objects logger.info('Purging test object %s - %s' % (obj.pid, obj.label)) repo.purge_object(obj.pid, "removing test object") count += 1 if count: print "Removed %s test object(s) with pidspace %s" % (count, settings.FEDORA_PIDSPACE)
def summary(request): '''Postcard summary/about page with information about the postcards and various entry points for accessing them.''' # get a list of all the postcards by searching in fedora # - used to get total count, and to display a random postcard # NOTE: this may be inefficient when all postcards are loaded; consider caching repo = Repository() search_opts = {'relation': settings.RELATION } postcards = list(repo.find_objects(**search_opts)) count = len(postcards) # TODO: get categories from fedora collection object categories = PostcardCollection.get().interp.content.interp_groups return render_to_response('postcards/index.html', { 'categories' : categories, 'count' : count, 'postcards': postcards, }, context_instance=RequestContext(request))
def postcard_image(request, pid, size): '''Serve out postcard image in requested size. :param pid: postcard object pid :param size: size to return, one of thumbnail, medium, or large ''' try: repo = Repository() obj = repo.get_object(pid, type=ImageObject) if size == 'thumbnail': image = obj.thumbnail() elif size == 'medium': image = obj.medium_image() elif size == 'large': image = obj.large_image() return HttpResponse(image, mimetype='image/jpeg') except RequestFailed as fail: raise Http404
def download_file(request, pid): '''Download the master file datastream associated with a :class:`~genrepo.file.models.FileObject`''' repo = Repository(request=request) # FIXME: what should the default download filename be? extra_headers = {'Content-Disposition': "attachment; filename=%s" % (pid)} # use generic raw datastream view from eulcore return raw_datastream(request, pid, FileObject.master.id, type=FileObject, repo=repo, headers=extra_headers)
def view_postcard(request, pid): '''View a single postcard at actual postcard size, with description.''' repo = Repository() try: obj = repo.get_object(pid, type=ImageObject) obj.label # access object label to trigger 404 before we get to the template #get ark from object ark = filter(lambda ident: 'ark' in ident, obj.dc.content.identifier_list) if len(ark) > 0: ark = ark[0] else: ark = '' # #get description from description elements description = filter(lambda desc: desc.startswith(settings.POSTCARD_DESCRIPTION_LABEL), obj.dc.content.description_list) if len(description) > 0: description = description[0] description = description[len(settings.POSTCARD_DESCRIPTION_LABEL):] #trim off label used to identify description else: description = '' #get postcard text from description elements postcard_text = filter(lambda desc: desc.startswith(settings.POSTCARD_FLOATINGTEXT_LABEL), obj.dc.content.description_list) if len(postcard_text) > 0: postcard_text = postcard_text[0] postcard_text = postcard_text[len(settings.POSTCARD_FLOATINGTEXT_LABEL):] #trim off label used to identify postcard text else: postcard_text = '' return render_to_response('postcards/view_postcard.html', {'card' : obj, 'ark' : ark, 'description' : description, 'postcard_text' : postcard_text }, context_instance=RequestContext(request)) except RequestFailed: raise Http404
def handle(self, cards_fname, image_dir, dry_run=False, **options): verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all v_normal = 1 #populate usr and pass repo_args = {} if options.get('username') is not None: repo_args['username'] = options.get('username') if options.get('password') is not None: repo_args['password'] = options.get('password') repo = Repository(**repo_args) collection = PostcardCollection.get() if not collection.exists: raise Exception(collection.pid + " is not in the repository. Do you need to syncrepo?") def anas_simple(my_dict,a): for ana in my_dict: if ana in a: return my_dict[ana] # def anas_complex(a,b): # for ana in ana_lcc: # if ana not in a: # return FALSE # if ana in b: # return ana_lcc[ana] #dictionary of lc subjects, simple (using 1 ana id) and complex (using 2 ana ids). ana_lcs = {"nat-it":"World War, 1914-1918--Italy", "nat-fr":"World War, 1914-1918--France", "nat-us":"World War, 1914-1918--United States", "nat-de":"World War, 1914-1918--Germany", "nat-brit":"World War, 1914-1918--Great Britain", "nat-bel":"World War, 1914-1918--Belgium", "nat-au":"World War, 1914-1918--Austria", "nat-nl":"World War, 1914-1918--Netherlands", "nat-rus":"World War, 1914-1918--Russia", "nat-jp":" World War, 1914-1918--Japan", "nat-ee":"World War, 1914-1918--Eastern Europe", "nat-ca":"World War, 1914-1918--Canada", "nat-hu":"World War, 1914-1918--Hungary", "mil-nur":"Military Nursing", "con-h":"World War, 1914-1918--Humor", "con-v":"World War, 1914-1918--Poetry", "con-p":"World War, 1914-1918--Persons", "con-m":"World War, 1914-1918--Memorials", "con-r":"World War, 1914-1918--Destruction and pillage", "con-f":"Flags in art", "con-el":"Uncle Elmer", "hf-p":"World War, 1914-1918--Propaganda", "hf-c":"World War, 1914-1918--Children", "hf-w":"World War, 1914-1918--Women", "hf-re":"World War, 1914-1918--Religious aspects", "hf-ro":"World War, 1914-1918--Man-Woman relationships", } ana_lcc_army = {"nat-fr":u"France. Arm\xe9e", "nat-brit":"Great Britain. Army", "nat-bel":u"Belgium. Arm\xe9e", "nat-de":"Germany. Heer", "nat-us":"United States. Army", "nat-ca":"Canada. Canadian Army", "nat-jp":"Japan. Rikugun", "nat-au":u"Austria. Arm\xe9e", } ana_lcc_navy = {"nat-brit":"Royal Navy. Great Britain", "nat-us":"United States. Navy", "nat-fr":"France. Marine", "nat-de":"Germany. Kriegsmarine", "nat-ca":"Canada. Royal Canadian Navy", } #images use dc:type ana_lcimage = {"im-ph":"photograph", "im-pa":"painting", "im-dr":"drawing", "im-ca":"cartoon", "im-en":"engraving", "im-po":"poster", "im-s":"silk postcard", } #use dc:coverage ana_lccoverage = {"t-wwi":"1914-1918", "t-pre":"Before 1914", "t-post":"After 1918", "t-ww2":"1939-1945", "t-post2":"After 1945", } # make a dictionary of subjects so type and value is easily accessible by id interps = collection.interp.content.interp_groups subjects = {} for group in interps: for interp in group.interp: subjects[interp.id] = (group.type, interp.value) cards_tei = load_xmlobject_from_file(cards_fname, xmlclass=Tei) cards = cards_tei.body.all_figures files = 0 ingested = 0 for c in cards: file = os.path.join(image_dir, '%s.tif' % c.entity) if os.access(file, os.F_OK): if verbosity >= v_normal: print "Found master file %s for %s" % (file, c.entity) else: file = os.path.join(image_dir, 'wwi_%s.tif' % c.entity) if os.access(file, os.F_OK): if verbosity >= v_normal: print "Found master file %s for %s" % (file, c.entity) else: if verbosity >= v_normal: print "File not found for %s" % c.entity continue files += 1 obj = repo.get_object(type=ImageObject) obj.dc.content.identifier_list.append(c.entity) # Store local identifiers in DC obj.label = c.head obj.owner = settings.FEDORA_OBJECT_OWNERID obj.dc.content.title = obj.label #append label so postcard description can be identified in the description elements obj.dc.content.description_list.append('%s%s' % (settings.POSTCARD_DESCRIPTION_LABEL, c.description)) #Add floating text from postcards (text written on the card) float_lines = [] # list of lines of text from the postcard f_text = c.floatingText if len(f_text) > 0: f_text = f_text[0] if f_text.head: float_lines.append(f_text.head) if len(f_text.line_group) > 0: for group in f_text.line_group: if group.head is not None: #treat head as normal line float_lines.append(group.head) for line in group.line: #add the rest of the lines float_lines.append(line) float_lines.append('\n') #each linegroup needs an extra \n at the end to make a paragraph elif len(f_text.line) > 0: for line in f_text.line: float_lines.append(line) float_lines = map(unicode, float_lines) #convert all lines to unicode float_lines = str.join("\n", float_lines) #Add \n for each line break and convert to a str #append label so floating text (postcard text) can be identified in the description elements obj.dc.content.description_list.append('%s%s' % (settings.POSTCARD_FLOATINGTEXT_LABEL, float_lines)) # convert interp text into dc: subjects local_subjects = [] for ana_id in c.ana.split(): # ana_id = c.ana.split() if ana_id in subjects: local_subjects.append('%s: %s' % subjects[ana_id]) else: print 'ana id %s not recognized for %s' % (ana_id, c.entity) obj.dc.content.subject_list.extend(local_subjects) lc_subjects = [] ana_ids = [] ana_ids = c.ana.split() if verbosity > v_normal: print 'DEBUG: %s are the ana ids for %s' % (ana_ids, c.entity) for ana_id in ana_ids: if ana_id in ana_lcc_army: for ana_id2 in ana_ids: if ana_id2 == "mil-a": ana_lc = ana_lcc_army[ana_id] lc_subjects.append('%s' % ana_lc) print '%s added to LC subjects list-army or navy' % ana_lc if ana_id in ana_lcc_navy: for ana_id2 in ana_ids: if ana_id2 == "mil-na": ana_lc = ana_lcc_navy[ana_id] lc_subjects.append('%s' % ana_lc) print '%s added to LC subjects list-army or navy' % ana_lc if ana_id in ana_lcs: ana_lc = anas_simple(ana_lcs, ana_id) lc_subjects.append('%s' % ana_lc) print '%s added to LC subjects list-nat, mil-nur, con, hf' % (ana_lc) # else: # print 'ana id %s not recognized for %s' % (ana_id, c.entity) obj.dc.content.subject_list.extend(lc_subjects) for ana_id in ana_ids: my_dict = ana_lcimage if ana_id in my_dict: # print 'DEBUG %s found in image list' % ana_id ana_image = anas_simple(my_dict, ana_id) # print 'DEBUG %s is the value for %s' % (ana_image, ana_id) lc_subjects.append('%s' % ana_image) print '%s added to LC subjects list-image type' % (ana_image) # else: # print 'ana id %s not recognized for %s' % (ana_id, c.entity) obj.dc.content.type_list.extend(lc_subjects) for ana_id in ana_ids: my_dict = ana_lccoverage if ana_id in my_dict: ana_cover = anas_simple(my_dict,ana_id) lc_subjects.append('%s' % ana_cover) print '%s added to LC subjects list-coverage' % ana_cover # else: # print 'ana id %s not recognized for %s' % (ana_id, c.entity) obj.dc.content.coverage_list.extend(lc_subjects) # common DC for all postcards obj.dc.content.type = 'image' obj.dc.content.type = 'postcard' obj.dc.content.relation_list.extend([settings.RELATION, 'http://beck.library.emory.edu/greatwar/']) # set file as content of image datastream obj.image.content = open(file) # add relation to postcard collection obj.rels_ext.content.add(( URIRef(obj.uri), URIRef(MEMBER_OF_COLLECTION), URIRef(PostcardCollection.get().uri) )) # TODO: OAI identifier ? if verbosity > v_normal: print "Dublin Core\t\n", obj.dc.content.serialize(pretty=True) print "RELS-EXT \t\n", obj.rels_ext.content.serialize(pretty=True) if not dry_run: obj.save() print "ingested %s as %s" % (unicode(c.head).encode('latin-1'), obj.pid) ingested += 1 # summarize what was done print "Found %d postcards " % len(cards) print "Found %d postcard files " % files print "Ingested %d postcards " % ingested
def _create_or_edit_collection(request, pid=None): """View to create a new :class:`~genrepo.collection.models.CollectionObject` or update an existing one. On GET, display the form. When valid form data is POSTed, creates a new collection (if pid is None) or updates an existing collection. """ status_code = None repo = Repository(request=request) # get the object (if pid is not None), or create a new instance obj = repo.get_object(pid, type=CollectionObject) # on GET, instantiate the form with existing object data (if any) if request.method == 'GET': form = CollectionDCEditForm(instance=obj.dc.content) # on POST, create a new collection object, update DC from form # data (if valid), and save elif request.method == 'POST': form = CollectionDCEditForm(request.POST, instance=obj.dc.content) if form.is_valid(): form.update_instance() # also use dc:title as object label obj.label = obj.dc.content.title try: if obj.exists: action = 'updated' save_msg = 'updated via genrepo' else: action = 'created new' save_msg = 'ingested via genrepo' # save message must be specified in order for Fedora # to generate & store an ingest audit trail event result = obj.save(save_msg) messages.success(request, 'Successfully %s collection <a href="%s"><b>%s</b></a>' % \ (action, reverse('collection:edit', args=[obj.pid]), obj.pid)) # maybe redirect to collection view page when we have one # - and maybe return a 201 Created status code return HttpResponseSeeOtherRedirect(reverse('site-index')) except (DigitalObjectSaveFailure, RequestFailed) as rf: # do we need a different error message for DigitalObjectSaveFailure? if isinstance(rf, PermissionDenied): msg = 'You don\'t have permission to create a collection in the repository.' else: msg = 'There was an error communicating with the repository.' messages.error(request, msg + ' Please contact a site administrator.') # pass the fedora error code back in the http response status_code = getattr(rf, 'code', None) # if form is not valid, fall through and re-render the form with errors response = render_to_response('collection/edit.html', { 'form': form, 'obj': obj }, request=request) # if a non-standard status code is set, set it in the response before returning if status_code is not None: response.status_code = status_code return response
def raw_datastream(request, pid, dsid, type=None, repo=None, headers={}): '''View to display a raw datastream that belongs to a Fedora Object. Returns an :class:`~django.http.HttpResponse` with the response content populated with the content of the datastream. The following HTTP headers may be included in all the responses: - Content-Type: mimetype of the datastream in Fedora - ETag: datastream checksum, as long as the checksum type is not 'DISABLED' The following HTTP headers may be included `only` for non-xml and non-RDF datastreams (excluded there since they may be inaccurate depending on the serialization of the content): - Content-MD5: MD5 checksum of the datastream in Fedora, if available - Content-Length: size of the datastream in Fedora If either the datastream or object are not found, raises an :class:`~django.http.Http404` . For any other errors (e.g., permission denied by Fedora), the exception is re-raised and should be handled elsewhere. :param request: HttpRequest :param pid: Fedora object PID :param dsid: datastream ID to be returned :param type: custom object type (should extend :class:`~eulcore.fedora.models.DigitalObject`) (optional) :param repo: :class:`~eulcore.django.fedora.server.Repository` instance to use, in case your application requires custom repository initialization (optional) :param headers: dictionary of additional headers to include in the response ''' if repo is None: repo = Repository() get_obj_opts = {} if type is not None: get_obj_opts['type'] = type obj = repo.get_object(pid, **get_obj_opts) try: # NOTE: we could test that pid is actually the requested # obj.has_requisite_content_models but that would mean # an extra API call for every datastream but RELS-EXT # Leaving out for now, for efficiency ds = obj.getDatastreamObject(dsid) if ds and ds.exists: # if the datastream content has a serialize option, use that if hasattr(ds.content, 'serialize'): content = ds.content.serialize(pretty=True) # otherwise, use content directly else: content = ds.content # NOTE: this will probably need some work to be able to handle large datastreams response = HttpResponse(content, mimetype=ds.mimetype) # if we have a checksum, use it as an ETag if ds.checksum_type != 'DISABLED': response['ETag'] = ds.checksum # TODO: set last-modified header also ? may require an extra API call # Where available & appropriate, pass along content length & MD5 # checksum in response headers. # MD5 and size may not be accurate for XML & RDF depending on # serialization, so leaving off in those cases. if ds.mimetype not in ['text/xml', 'application/rdf+xml']: if ds.checksum_type == 'MD5': response['Content-MD5'] = ds.checksum if ds.info.size: response['Content-Length'] = ds.info.size # set any user-specified headers that were passed in for header, val in headers.iteritems(): response[header] = val return response else: raise Http404 except RequestFailed as rf: print rf # if object is not the speficied type or if either the object # or the requested datastream doesn't exist, 404 if rf.code == 404 or \ (type is not None and not obj.has_requisite_content_models) or \ not getattr(obj, dsid).exists or not obj.exists : raise Http404 # for anything else, re-raise & let Django's default 500 logic handle it raise