def browse(request): "Browse postcards and display thumbnail images." repo = Repository() repo.default_object_type = ImageObject number_of_results = 15 context = {} search_opts = postcard_search_opts().copy() if 'subject' in request.GET: context['subject'] = request.GET['subject'] search_opts['subject'] = request.GET['subject'] postcards = repo.find_objects(**search_opts) postcard_paginator = Paginator(list(postcards), number_of_results) try: page = int(request.GET.get('page', '1')) except ValueError: page = 1 # If page request (9999) is out of range, deliver last page of results. try: postcard_page = postcard_paginator.page(page) except (EmptyPage, InvalidPage): postcard_page = postcard_paginator.page(postcard_paginator.num_pages) context['postcards_paginated'] = postcard_page return render(request, 'postcards/browse.html', context)
class FedoraTestCase(unittest.TestCase): def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) self.fedora_fixtures_ingested = [] self.pidspace = FEDORA_PIDSPACE self.repo = Repository(FEDORA_ROOT, FEDORA_USER, FEDORA_PASSWORD) # fixture cleanup happens in tearDown, which doesn't always run # if a test fails - clean up stale test objects from a previous run here stale_objects = list(self.repo.find_objects(pid__contains="%s:*" % self.pidspace)) if stale_objects: print "Removing %d stale test object(s) in pidspace %s" % (len(stale_objects), self.pidspace) for obj in stale_objects: try: self.repo.purge_object(obj.pid) except RequestFailed as rf: logger.warn("Error purging stale test object %s (TestCase init): %s" % (obj.pid, rf)) def setUp(self): # NOTE: queries require RI flush=True or test objects will not show up in RI self.repo.risearch.RISEARCH_FLUSH_ON_QUERY = True self.opener = self.repo.opener self.api = ApiFacade(self.opener) fixtures = getattr(self, "fixtures", []) for fixture in fixtures: self.ingestFixture(fixture) def tearDown(self): for pid in self.fedora_fixtures_ingested: try: self.repo.purge_object(pid) except RequestFailed as rf: logger.warn("Error purging test object %s in tear down: %s" % (pid, rf)) def getNextPid(self): pidspace = getattr(self, "pidspace", None) return self.repo.get_next_pid(namespace=pidspace) def loadFixtureData(self, fname): data = load_fixture_data(fname) # if pidspace is specified, get a new pid from fedora and set it as the pid in the xml if hasattr(self, "pidspace"): xml = xmlmap.load_xmlobject_from_string(data, _MinimalFoxml) xml.pid = self.getNextPid() return xml.serialize() else: return data def ingestFixture(self, fname): object = self.loadFixtureData(fname) pid = self.repo.ingest(object) if pid: # we'd like this always to be true. if ingest fails we should # throw an exception. that probably hasn't been thoroughly # tested yet, though, so we'll check it until it has been. self.append_test_pid(pid) def append_test_pid(self, pid): self.fedora_fixtures_ingested.append(pid)
def remove_test_objects(self): # remove any leftover test object before or after running tests # NOTE: This method expects to be called only when FEDORA_PIDSPACE has been # switched to a test pidspace # use test fedora credentials if they are set repo = Repository(root=getattr(settings, 'FEDORA_TEST_ROOT', None), username=getattr(settings, 'FEDORA_TEST_USER', None), password=getattr(settings, 'FEDORA_TEST_PASSWORD', None)) test_objects = repo.find_objects(pid__contains='%s:*' % settings.FEDORA_PIDSPACE) count = 0 for obj in test_objects: # if objects are unexpectedly not being cleaned up, pid/label may help # to isolate which test is creating the leftover objects try: repo.purge_object(obj.pid, "removing test object") # NOTE: not displaying label because we may not have permission to access it logger.info('Purged test object %s' % obj.pid) count += 1 except RequestFailed: logger.warn('Error purging test object %s' % obj.pid) if count: print >> sys.stderr, "Removed %s test object(s) with pidspace %s" \ % (count, settings.FEDORA_PIDSPACE)
def purge_item(item_id): repo = Repository() pid = 'pitt:%s' % (item_id,) objs = repo.find_objects(pid__contains=pid) for o in objs: repo.purge_object(o.pid) print '%s purged' % (o.pid,)
def search(request): # rough fedora-based postcard search (borrowed heavily from digital masters) form = SearchForm(request.GET) response_code = None context = {'search': form} number_of_results = 5 if form.is_valid(): # adding wildcards because fedora has a weird notion of what 'contains' means # TODO: terms search can't be used with with field search # -- how to allow a keyword search but restrict to postcards? #keywords = '%s*' % form.cleaned_data['keyword'].rstrip('*') # TEMPORARY: restrict to postcards by pidspace search_opts = {'relation': settings.RELATION } if 'title' in form.cleaned_data: search_opts['title__contains'] = '%s*' % form.cleaned_data['title'].rstrip('*') if 'description' in form.cleaned_data: search_opts['description__contains'] = '%s*' % form.cleaned_data['description'].rstrip('*') try: repo = Repository() found = repo.find_objects(type=ImageObject, **search_opts) search_paginator = Paginator(list(found), number_of_results) try: page = int(request.GET.get('page', '1')) except ValueError: page = 1 # If page request (9999) is out of range, deliver last page of results. try: search_page = search_paginator.page(page) except (EmptyPage, InvalidPage): search_page = search_paginator.page(search_paginator.num_pages) context['postcards_paginated'] = search_page context['title'] = form.cleaned_data['title'] context['description'] = form.cleaned_data['description'] except Exception as e: logging.debug(e) response_code = 500 context['server_error'] = 'There was an error ' + \ 'contacting the digital repository. This ' + \ 'prevented us from completing your search. If ' + \ 'this problem persists, please alert the ' + \ 'repository administrator.' response = render(request, 'postcards/search.html', context) if response_code is not None: response.status_code = response_code return response
def summary(request): '''Postcard summary/about page with information about the postcards and various entry points for accessing them.''' # get a list of all the postcards by searching in fedora # - used to get total count, and to display a random postcard # NOTE: this may be inefficient when all postcards are loaded; consider caching repo = Repository() postcards = list(repo.find_objects(**postcard_search_opts())) count = len(postcards) # get categories from fedora collection object categories = PostcardCollection.get().interp.content.interp_groups return render(request, 'postcards/index.html', { 'categories': categories, 'count': count, 'postcards': postcards, })
def remove_test_objects(self): # remove any leftover test object before or after running tests # NOTE: This method expects to be called only when FEDORA_PIDSPACE has been # switched to a test pidspace # use test fedora credentials if they are set repo = Repository(root=getattr(settings, 'FEDORA_TEST_ROOT', None), username=getattr(settings, 'FEDORA_TEST_USER', None), password=getattr(settings, 'FEDORA_TEST_PASSWORD', None)) test_objects = repo.find_objects(pid__contains='%s:*' % settings.FEDORA_PIDSPACE) count = 0 for obj in test_objects: # if objects are unexpectedly not being cleaned up, pid/label may help # to isolate which test is creating the leftover objects try: repo.purge_object(obj.pid, "removing test object") # NOTE: not displaying label because we may not have permission to access it logger.info('Purged test object %s' % obj.pid) count += 1 except RequestFailed: logger.warn('Error purging test object %s' % obj.pid) if count: print "Removed %s test object(s) with pidspace %s" % (count, settings.FEDORA_PIDSPACE)
class FedoraStorage(base.BaseStorage): """Fedora Commons repository storage.""" configform = ConfigForm defaults = dict( root=getattr(settings, "FEDORA_ROOT", ""), username=getattr(settings, "FEDORA_USER", ""), password=getattr(settings, "FEDORA_PASS", ""), namespace=getattr(settings, "FEDORA_PIDSPACE", ""), image_name=getattr(settings, "FEDORA_IMAGE_NAME", ""), transcript_name=getattr(settings, "FEDORA_TRANSCRIPT_NAME", "") ) def __init__(self, *args, **kwargs): super(FedoraStorage, self).__init__(*args, **kwargs) self.namespace = kwargs["namespace"] self.image_name = kwargs["image_name"] self.thumbnail_name = "THUMBNAIL" self.binary_name = "BINARY" self.script_name = "OCR_SCRIPT" self.transcript_name = kwargs["transcript_name"] self.repo = Repository( root=kwargs["root"], username=kwargs["username"], password=kwargs["password"]) self.model = type("Document", (DigitalObject,), { "default_pidspace": kwargs["namespace"], "FILE_CONTENT_MODEL": "info:fedora/genrepo:File-1.0", "CONTENT_MODELS": ["info:fedora/genrepo:File-1.0"], "image": FileDatastream(self.image_name, "Document image", defaults={ 'versionable': True, }), "binary": FileDatastream(self.binary_name, "Document image binary", defaults={ 'versionable': True, }), "thumbnail": FileDatastream(self.thumbnail_name, "Document image thumbnail", defaults={ 'versionable': True, }), "script": FileDatastream(self.script_name, "OCR Script", defaults={ "versionable": True, }), "transcript": FileDatastream(self.transcript_name, "Document transcript", defaults={ "versionable": True, }), "meta": FileDatastream("meta", "Document metadata", defaults={ "versionable": False, }), }) def read_metadata(self, doc): meta = doc._doc.meta.content if hasattr(meta, "read"): meta = meta.read() if not meta: return {} return dict([v.strip().split("=") for v in \ meta.split("\n") if re.match("^\w+=[^=]+$", v.strip())]) def write_metadata(self, doc, **kwargs): meta = self.read_metadata(doc) meta.update(kwargs) metacontent = [u"%s=%s" % (k, v) for k, v in meta.iteritems()] doc._doc.meta.content = "\n".join(metacontent) def attr_uri(self, doc, attr): """URI for image datastream.""" return "%sobjects/%s/datastreams/%s/content" % ( self.repo.fedora_root, urllib.quote(doc.pid), getattr(self, "%s_name" % attr) ) def document_label(self, doc): """Get the document label.""" return doc._doc.label def document_attr_empty(self, doc, attr): """Check if document attr is empty.""" return getattr(doc._doc, attr).info.size == 0 def document_attr_label(self, doc, attr): """Get label for an image type attribute.""" return getattr(doc._doc, attr).label def document_attr_mimetype(self, doc, attr): """Get mimetype for an image type attribute.""" return getattr(doc._doc, attr).mimetype def document_attr_content_handle(self, doc, attr): """Get content for an image type attribute.""" handle = getattr(doc._doc, attr).content return StringIO() if handle is None else handle def document_metadata(self, doc): """Get document metadata. This currently just exposes the DC stream attributes.""" return self.read_metadata(doc) def _set_document_ds_content(self, doc, dsattr, content): docattr = getattr(doc._doc, dsattr) #checksum = hashlib.md5(content.read()).hexdigest() #content.seek(0) #docattr.checksum = checksum #docattr.checksum_type = "MD5" docattr.content = content def set_document_attr_content(self, doc, attr, content): """Set image content.""" self._set_document_ds_content(doc, attr, content) def set_document_attr_mimetype(self, doc, attr, mimetype): """Set image mimetype.""" getattr(doc._doc, attr).mimetype = mimetype def set_document_attr_label(self, doc, attr, label): """Set image label.""" getattr(doc._doc, attr).label = label def set_document_label(self, doc, label): """Set document label.""" doc._doc.label = label def set_document_metadata(self, doc, **kwargs): """Set arbitrary document metadata.""" self.write_metadata(doc, kwargs) def save_document(self, doc): """Save document.""" doc._doc.save() def create_document(self, label): """Get a new document object""" dobj = self.repo.get_object(type=self.model) dobj.label = label dobj.meta.label = "Document Metadata" dobj.meta.mimetype = "text/plain" doc = FedoraDocument(dobj, self) return doc def get(self, pid): """Get an object by id.""" doc = self.repo.get_object(pid, type=self.model) if doc: return FedoraDocument(doc, self) def delete(self, doc, msg=None): """Delete an object.""" self.repo.purge_object(doc.pid, log_message=msg) def list(self, namespace=None): """List documents in the repository.""" ns = namespace if namespace is not None else self.namespace return [FedoraDocument(d, self) \ for d in self.repo.find_objects("%s:*" % ns, type=self.model)] def list_pids(self, namespace=None): """List of pids. This unforunately involves calling list(), so it not a quicker alternative.""" return [doc.pid for doc in self.list()]
def test_badhostname(self): self.ingestFixture('object-with-pid.foxml') pid = self.fedora_fixtures_ingested[0] repo = Repository('http://bogus.host.name.foo:8080/fedora/') # TODO: currently just a URLError; make test more specific if we add more specific exceptions self.assertRaises(Exception, list, repo.find_objects(pid=pid))
def test_nonssl(self): self.ingestFixture('object-with-pid.foxml') pid = self.fedora_fixtures_ingested[0] repo = Repository(FEDORA_ROOT_NONSSL, FEDORA_USER, FEDORA_PASSWORD) found = list(repo.find_objects(pid=pid)) self.assertEqual(1, len(found))
class FedoraTestCase(unittest.TestCase): def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) self.fedora_fixtures_ingested = [] self.pidspace = FEDORA_PIDSPACE self.repo = Repository(FEDORA_ROOT, FEDORA_USER, FEDORA_PASSWORD) # fixture cleanup happens in tearDown, which doesn't always run # if a test fails - clean up stale test objects from a previous run here stale_objects = list(self.repo.find_objects(pid__contains='%s:*' % self.pidspace)) if stale_objects: logger.info('Removing %d stale test object(s) in pidspace %s' \ % (len(stale_objects), self.pidspace)) for obj in stale_objects: try: self.repo.purge_object(obj.pid) except RequestFailed as rf: logger.warn('Error purging stale test object %s (TestCase init): %s' % \ (obj.pid, rf)) def setUp(self): # NOTE: queries require RI flush=True or test objects will not show up in RI self.repo.risearch.RISEARCH_FLUSH_ON_QUERY = True self.opener = self.repo.opener self.api = ApiFacade(self.opener) fixtures = getattr(self, 'fixtures', []) for fixture in fixtures: self.ingestFixture(fixture) def tearDown(self): for pid in self.fedora_fixtures_ingested: try: self.repo.purge_object(pid) except RequestFailed as rf: logger.warn('Error purging test object %s in tear down: %s' % \ (pid, rf)) def getNextPid(self): pidspace = getattr(self, 'pidspace', None) return self.repo.get_next_pid(namespace=pidspace) def loadFixtureData(self, fname): data = load_fixture_data(fname) # if pidspace is specified, get a new pid from fedora and set it as the pid in the xml if hasattr(self, 'pidspace'): xml = xmlmap.load_xmlobject_from_string(data, _MinimalFoxml) xml.pid = self.getNextPid() return xml.serialize() else: return data def ingestFixture(self, fname): object = self.loadFixtureData(fname) pid = self.repo.ingest(object) if pid: # we'd like this always to be true. if ingest fails we should # throw an exception. that probably hasn't been thoroughly # tested yet, though, so we'll check it until it has been. self.append_pid(pid) # note: renamed from append_test_pid so that nosetests doesn't # autodetect and attempt to run as a unit test. def append_pid(self, pid): self.fedora_fixtures_ingested.append(pid)
def handle(self, *args, **kwargs): verbosity = kwargs.get('verbosity', self.v_normal) repo = Repository() video_duplicates = repo.find_objects(type=Video, label="DELETE") for pid in video_duplicates: repo.purge_object(pid.pid)
class FedoraFS(fuse.Fuse): def __init__(self, *args, **kw): # fedora-specific configuration parameters # these are the defaults; can be overridden with command-line options self.base_url = 'http://localhost:8080/fedora/' self.username = '******' self.password = '******' self.filter = None self.towrite = {} fuse.Fuse.__init__(self, *args, **kw) self._members = None self.files = {} self.lastfiles = {} self.objects = {} def main(self, args=None): # initialize fedora connection AFTER command line options have been parsed # eulcore.fedora.sever requries base_url with a trailing slash if not self.base_url.endswith('/'): self.base_url = '%s/' % self.base_url # if there is a username and an *empty* password, prompt user if self.username and not self.password: self.password = getpass() # TODO: support netrc for credentials? # TODO: catch invalid credentials!! (don't retry if invalid) self.repo = Repository(self.base_url, self.username, self.password) self.repo.default_object_type = FsObject fuse.Fuse.main(self, args) @property def members(self): if self._members is None: # initialize list of members for top-level directory # only search if pid list has not already been populated found = self.repo.find_objects(terms=self.filter) if found: self._members = {} for i in range(150): # ??? how to limit this reasonably? #for i in range(10): # ??? how to limit this reasonably? obj = found.next() self._members[obj.pid] = obj return self._members def getattr(self, path): path_els = path.strip('/').split('/') logger.debug('getattr for path %s - path elements are %s' % (path, path_els)) st = FsStat() # access time defaults to now if path == '/': st.st_mode = stat.S_IFDIR | 0755 # for a directory, number of links should be subdirs + 2 # make sure pid list is up-to-date before calculating logger.debug('members are %s, count is %s' % (self.members, len(self.members.keys()))) st.st_nlink = 2 + len(self.members.keys()) return st else: if path_els[0] in self.members: return self.members[path_els[0]].fs_attr(*path_els[1:]) else: return None # ?? #st.st_mode = stat.S_IFREG | 0444 return st def readdir (self, path, offset): dir_entries = [ '.', '..' ] # FIXME: howto use offset for large directories ? path_els = path.strip('/').split('/') logger.debug('readdir for path %s, offset %s - path elements are %s' % \ (path, offset, path_els)) if path == '/': # if root dir_entries.extend([obj.fs_name() for obj in self.members.itervalues()]) elif path_els[0] in self.members: dir_entries.extend(self.members[path_els[0]].fs_members(*path_els[1:])) logger.debug('dir entries for %s are %s' % (path, dir_entries)) for r in dir_entries: yield fuse.Direntry(r) def read(self, path, size, offset): logger.debug('read path=%s, size=%s, offset=%s' % (path, size, offset)) path_els = path.strip('/').split('/') # FIXME: not very efficient if path_els[0] in self.members: str = self.members[path_els[0]].fs_read(*path_els[1:]) slen = len(str) if offset < slen: if offset + size > slen: size = slen - offset buf = str[offset:offset+size] else: buf = '' return buf def write(self, path, buf, offset): path_els = path.strip('/').split('/') logger.debug('write: offset %s, buf: %s' % (offset, buf)) # currently only expect to be writing datastreams if len(path_els) == 2 and path_els[0] in self.members and \ path_els[1] in self.members[path_els[0]].fs_members(writable_only=True): # ok to write for this path if path not in self.towrite.keys(): if offset != 0: return -errno.ENOSYS # don't allow starting to write a file in the middle self.towrite[path] = MutableString() self.towrite[path][offset:len(buf)] = buf return len(buf) else: # attempting to write somtehing we don't handle return -errno.ENOSYS def fsync(self, path, isfsyncfile): # FIXME: what does isfsyncfile do ? # actually write the contents to fedora # assuming if there is data to write, file should be written (?) if path in self.towrite.keys(): contents = str(self.towrite[path]) logger.debug('fsync contents: %s' % contents) if contents: path_els = path.strip('/').split('/') # stuff should only get in towrite if it was determined to be writable if self.members[path_els[0]].fs_write(path_els[1], contents): logger.debug('%s updated successfully' % path) del self.towrite[path] return 0 def mknod(self, path, mode, dev): return 0 def unlink(self, path): return 0 def release(self, path, flags): return 0 def open(self, path, flags): return 0 def truncate(self, path, size): return 0 def utime(self, path, times): return 0 def mkdir(self, path, mode): return 0 def rmdir(self, path): return 0 def rename(self, pathfrom, pathto): return 0 def readlink(self, path): logger.debug('readlink path=%s' % path) # for now, the only symlink in use is /pid/relation/pid # FIXME: can we shift this logic to the model somehow? els = path.split('/') pid = els[-1] # last element is pid - link to top-level pid entry newpath = "../../" + pid return newpath def fuseoptref(self): fuse_args = fuse.FuseArgs() fuse.args.optlist = {"url": self.base_url, "username": self.username, "password": self.password, "filter": self.filter, } return fuse_args