コード例 #1
0
def browse(request):
    "Browse postcards and display thumbnail images."
    repo = Repository()
    repo.default_object_type = ImageObject
    number_of_results = 15
    context = {}

    search_opts = postcard_search_opts().copy()
    if 'subject' in request.GET:
        context['subject'] = request.GET['subject']
        search_opts['subject'] = request.GET['subject']

    postcards = repo.find_objects(**search_opts)

    postcard_paginator = Paginator(list(postcards), number_of_results)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    # If page request (9999) is out of range, deliver last page of results.
    try:
        postcard_page = postcard_paginator.page(page)
    except (EmptyPage, InvalidPage):
        postcard_page = postcard_paginator.page(postcard_paginator.num_pages)

    context['postcards_paginated'] = postcard_page

    return render(request, 'postcards/browse.html', context)
コード例 #2
0
ファイル: base.py プロジェクト: kwbock/eulfedora
class FedoraTestCase(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        unittest.TestCase.__init__(self, *args, **kwargs)
        self.fedora_fixtures_ingested = []
        self.pidspace = FEDORA_PIDSPACE

        self.repo = Repository(FEDORA_ROOT, FEDORA_USER, FEDORA_PASSWORD)

        # fixture cleanup happens in tearDown, which doesn't always run
        # if a test fails - clean up stale test objects from a previous run here
        stale_objects = list(self.repo.find_objects(pid__contains="%s:*" % self.pidspace))
        if stale_objects:
            print "Removing %d stale test object(s) in pidspace %s" % (len(stale_objects), self.pidspace)
            for obj in stale_objects:
                try:
                    self.repo.purge_object(obj.pid)
                except RequestFailed as rf:
                    logger.warn("Error purging stale test object %s (TestCase init): %s" % (obj.pid, rf))

    def setUp(self):
        # NOTE: queries require RI flush=True or test objects will not show up in RI
        self.repo.risearch.RISEARCH_FLUSH_ON_QUERY = True
        self.opener = self.repo.opener
        self.api = ApiFacade(self.opener)
        fixtures = getattr(self, "fixtures", [])
        for fixture in fixtures:
            self.ingestFixture(fixture)

    def tearDown(self):
        for pid in self.fedora_fixtures_ingested:
            try:
                self.repo.purge_object(pid)
            except RequestFailed as rf:
                logger.warn("Error purging test object %s in tear down: %s" % (pid, rf))

    def getNextPid(self):
        pidspace = getattr(self, "pidspace", None)
        return self.repo.get_next_pid(namespace=pidspace)

    def loadFixtureData(self, fname):
        data = load_fixture_data(fname)
        # if pidspace is specified, get a new pid from fedora and set it as the pid in the xml
        if hasattr(self, "pidspace"):
            xml = xmlmap.load_xmlobject_from_string(data, _MinimalFoxml)
            xml.pid = self.getNextPid()
            return xml.serialize()
        else:
            return data

    def ingestFixture(self, fname):
        object = self.loadFixtureData(fname)
        pid = self.repo.ingest(object)
        if pid:
            # we'd like this always to be true. if ingest fails we should
            # throw an exception. that probably hasn't been thoroughly
            # tested yet, though, so we'll check it until it has been.
            self.append_test_pid(pid)

    def append_test_pid(self, pid):
        self.fedora_fixtures_ingested.append(pid)
コード例 #3
0
ファイル: testutil.py プロジェクト: bodleian/eulfedora
    def remove_test_objects(self):
        # remove any leftover test object before or after running tests
        # NOTE: This method expects to be called only when FEDORA_PIDSPACE has been
        # switched to a test pidspace

        # use test fedora credentials if they are set
        repo = Repository(root=getattr(settings, 'FEDORA_TEST_ROOT', None),
                          username=getattr(settings, 'FEDORA_TEST_USER', None),
                          password=getattr(settings, 'FEDORA_TEST_PASSWORD',
                                           None))
        test_objects = repo.find_objects(pid__contains='%s:*' %
                                         settings.FEDORA_PIDSPACE)
        count = 0
        for obj in test_objects:
            # if objects are unexpectedly not being cleaned up, pid/label may help
            # to isolate which test is creating the leftover objects
            try:
                repo.purge_object(obj.pid, "removing test object")
                # NOTE: not displaying label because we may not have permission to access it
                logger.info('Purged test object %s' % obj.pid)
                count += 1
            except RequestFailed:
                logger.warn('Error purging test object %s' % obj.pid)
        if count:
            print >> sys.stderr, "Removed %s test object(s) with pidspace %s" \
                % (count, settings.FEDORA_PIDSPACE)
コード例 #4
0
ファイル: purge_object.py プロジェクト: abrennr/drlrepo
def purge_item(item_id):
    repo = Repository()
    pid = 'pitt:%s' % (item_id,)
    objs = repo.find_objects(pid__contains=pid)
    for o in objs:
        repo.purge_object(o.pid)
        print '%s purged' % (o.pid,)
コード例 #5
0
def search(request):
    # rough fedora-based postcard search (borrowed heavily from digital masters)
    form = SearchForm(request.GET)
    response_code = None
    context = {'search': form}
    number_of_results = 5
    if form.is_valid():
        # adding wildcards because fedora has a weird notion of what 'contains' means

        # TODO: terms search can't be used with with field search
        # -- how to allow a keyword search but restrict to postcards?
        #keywords = '%s*' % form.cleaned_data['keyword'].rstrip('*')

        # TEMPORARY: restrict to postcards by pidspace
        search_opts = {'relation': settings.RELATION }
        if 'title' in form.cleaned_data:
            search_opts['title__contains'] = '%s*' % form.cleaned_data['title'].rstrip('*')
        if 'description' in form.cleaned_data:
            search_opts['description__contains'] = '%s*' % form.cleaned_data['description'].rstrip('*')
        try:
            repo = Repository()
            found = repo.find_objects(type=ImageObject, **search_opts)

            search_paginator = Paginator(list(found), number_of_results)
            try:
                page = int(request.GET.get('page', '1'))
            except ValueError:
                page = 1
            # If page request (9999) is out of range, deliver last page of results.
            try:
                search_page = search_paginator.page(page)
            except (EmptyPage, InvalidPage):
                search_page = search_paginator.page(search_paginator.num_pages)


            context['postcards_paginated'] = search_page
            context['title'] = form.cleaned_data['title']
            context['description'] = form.cleaned_data['description']
        except Exception as e:
            logging.debug(e)
            response_code = 500
            context['server_error'] = 'There was an error ' + \
                    'contacting the digital repository. This ' + \
                    'prevented us from completing your search. If ' + \
                    'this problem persists, please alert the ' + \
                    'repository administrator.'

    response = render(request, 'postcards/search.html', context)
    if response_code is not None:
        response.status_code = response_code
    return response
コード例 #6
0
def summary(request):
    '''Postcard summary/about page with information about the postcards and
    various entry points for accessing them.'''

    # get a list of all the postcards by searching in fedora
    # - used to get total count, and to display a random postcard
    # NOTE: this may be inefficient when all postcards are loaded; consider caching
    repo = Repository()
    postcards = list(repo.find_objects(**postcard_search_opts()))
    count = len(postcards)
    # get categories from fedora collection object
    categories = PostcardCollection.get().interp.content.interp_groups
    return render(request, 'postcards/index.html', {
       'categories': categories,
       'count': count,
       'postcards': postcards,
       })
コード例 #7
0
ファイル: testutil.py プロジェクト: jrhoads/eulfedora
    def remove_test_objects(self):
        # remove any leftover test object before or after running tests
        # NOTE: This method expects to be called only when FEDORA_PIDSPACE has been
        # switched to a test pidspace

        # use test fedora credentials if they are set
        repo = Repository(root=getattr(settings, 'FEDORA_TEST_ROOT', None),
                          username=getattr(settings, 'FEDORA_TEST_USER', None),
                          password=getattr(settings, 'FEDORA_TEST_PASSWORD', None))
        test_objects = repo.find_objects(pid__contains='%s:*' % settings.FEDORA_PIDSPACE)
        count = 0
        for obj in test_objects:
            # if objects are unexpectedly not being cleaned up, pid/label may help
            # to isolate which test is creating the leftover objects
            try:
                repo.purge_object(obj.pid, "removing test object")
                # NOTE: not displaying label because we may not have permission to access it
                logger.info('Purged test object %s' % obj.pid)
                count += 1
            except RequestFailed:
                logger.warn('Error purging test object %s' % obj.pid)
        if count:
            print "Removed %s test object(s) with pidspace %s" % (count, settings.FEDORA_PIDSPACE)
コード例 #8
0
class FedoraStorage(base.BaseStorage):
    """Fedora Commons repository storage."""

    configform = ConfigForm
    defaults = dict(
            root=getattr(settings, "FEDORA_ROOT", ""),
            username=getattr(settings, "FEDORA_USER", ""),
            password=getattr(settings, "FEDORA_PASS", ""),
            namespace=getattr(settings, "FEDORA_PIDSPACE", ""),
            image_name=getattr(settings, "FEDORA_IMAGE_NAME", ""),
            transcript_name=getattr(settings, "FEDORA_TRANSCRIPT_NAME", "")
    )

    def __init__(self, *args, **kwargs):
        super(FedoraStorage, self).__init__(*args, **kwargs)
        self.namespace = kwargs["namespace"]
        self.image_name = kwargs["image_name"]
        self.thumbnail_name = "THUMBNAIL"
        self.binary_name = "BINARY"
        self.script_name = "OCR_SCRIPT"
        self.transcript_name = kwargs["transcript_name"]

        self.repo = Repository(
                root=kwargs["root"], username=kwargs["username"],
                password=kwargs["password"])

        self.model = type("Document", (DigitalObject,), {
            "default_pidspace": kwargs["namespace"],
            "FILE_CONTENT_MODEL": "info:fedora/genrepo:File-1.0",
            "CONTENT_MODELS":     ["info:fedora/genrepo:File-1.0"],
            "image": FileDatastream(self.image_name, "Document image", defaults={
              'versionable': True,
            }),
            "binary": FileDatastream(self.binary_name, "Document image binary", defaults={
              'versionable': True,
            }),
            "thumbnail": FileDatastream(self.thumbnail_name, "Document image thumbnail", defaults={
              'versionable': True,
            }),
            "script": FileDatastream(self.script_name, "OCR Script", defaults={
                "versionable": True,
            }),
            "transcript": FileDatastream(self.transcript_name, "Document transcript", defaults={
                "versionable": True,
            }),
            "meta": FileDatastream("meta", "Document metadata", defaults={
                "versionable": False,
            }),
        })

    def read_metadata(self, doc):
        meta = doc._doc.meta.content
        if hasattr(meta, "read"):
            meta = meta.read()
        if not meta:
            return {}
        return dict([v.strip().split("=") for v in \
                meta.split("\n") if re.match("^\w+=[^=]+$", v.strip())])

    def write_metadata(self, doc, **kwargs):
        meta = self.read_metadata(doc)
        meta.update(kwargs)
        metacontent = [u"%s=%s" % (k, v) for k, v in meta.iteritems()]
        doc._doc.meta.content = "\n".join(metacontent)

    def attr_uri(self, doc, attr):
        """URI for image datastream."""
        return "%sobjects/%s/datastreams/%s/content" % (
                self.repo.fedora_root,
                urllib.quote(doc.pid),
                getattr(self, "%s_name" % attr)
        )

    def document_label(self, doc):
        """Get the document label."""
        return doc._doc.label

    def document_attr_empty(self, doc, attr):
        """Check if document attr is empty."""
        return getattr(doc._doc, attr).info.size == 0

    def document_attr_label(self, doc, attr):
        """Get label for an image type attribute."""
        return getattr(doc._doc, attr).label

    def document_attr_mimetype(self, doc, attr):
        """Get mimetype for an image type attribute."""
        return getattr(doc._doc, attr).mimetype

    def document_attr_content_handle(self, doc, attr):
        """Get content for an image type attribute."""
        handle = getattr(doc._doc, attr).content
        return StringIO() if handle is None else handle

    def document_metadata(self, doc):
        """Get document metadata. This currently
        just exposes the DC stream attributes."""                
        return self.read_metadata(doc)

    def _set_document_ds_content(self, doc, dsattr, content):
        docattr = getattr(doc._doc, dsattr)
        #checksum = hashlib.md5(content.read()).hexdigest()
        #content.seek(0)
        #docattr.checksum = checksum
        #docattr.checksum_type = "MD5"
        docattr.content = content

    def set_document_attr_content(self, doc, attr, content):
        """Set image content."""
        self._set_document_ds_content(doc, attr, content)

    def set_document_attr_mimetype(self, doc, attr, mimetype):
        """Set image mimetype."""
        getattr(doc._doc, attr).mimetype = mimetype
    
    def set_document_attr_label(self, doc, attr, label):
        """Set image label."""
        getattr(doc._doc, attr).label = label

    def set_document_label(self, doc, label):
        """Set document label."""
        doc._doc.label = label

    def set_document_metadata(self, doc, **kwargs):
        """Set arbitrary document metadata."""
        self.write_metadata(doc, kwargs)

    def save_document(self, doc):
        """Save document."""
        doc._doc.save()

    def create_document(self, label):
        """Get a new document object"""
        dobj = self.repo.get_object(type=self.model)
        dobj.label = label
        dobj.meta.label = "Document Metadata"
        dobj.meta.mimetype = "text/plain"
        doc = FedoraDocument(dobj, self)
        return doc

    def get(self, pid):
        """Get an object by id."""
        doc = self.repo.get_object(pid, type=self.model)
        if doc:
            return FedoraDocument(doc, self)

    def delete(self, doc, msg=None):
        """Delete an object."""
        self.repo.purge_object(doc.pid, log_message=msg)

    def list(self, namespace=None):
        """List documents in the repository."""
        ns = namespace if namespace is not None else self.namespace
        return [FedoraDocument(d, self) \
                for d in self.repo.find_objects("%s:*" % ns, type=self.model)]
        
    def list_pids(self, namespace=None):
        """List of pids.  This unforunately involves calling
        list(), so it not a quicker alternative."""
        return [doc.pid for doc in self.list()]
コード例 #9
0
ファイル: test_server.py プロジェクト: jrhoads/eulfedora
 def test_badhostname(self):
     self.ingestFixture('object-with-pid.foxml')
     pid = self.fedora_fixtures_ingested[0]
     repo = Repository('http://bogus.host.name.foo:8080/fedora/')
     # TODO: currently just a URLError; make test more specific if we add more specific exceptions
     self.assertRaises(Exception, list, repo.find_objects(pid=pid))
コード例 #10
0
ファイル: test_server.py プロジェクト: jrhoads/eulfedora
 def test_nonssl(self):
     self.ingestFixture('object-with-pid.foxml')
     pid = self.fedora_fixtures_ingested[0]
     repo = Repository(FEDORA_ROOT_NONSSL, FEDORA_USER, FEDORA_PASSWORD)
     found = list(repo.find_objects(pid=pid))
     self.assertEqual(1, len(found))
コード例 #11
0
ファイル: base.py プロジェクト: dasch124/eulfedora
class FedoraTestCase(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        unittest.TestCase.__init__(self, *args, **kwargs)
        self.fedora_fixtures_ingested = []
        self.pidspace = FEDORA_PIDSPACE

        self.repo = Repository(FEDORA_ROOT, FEDORA_USER, FEDORA_PASSWORD)

        # fixture cleanup happens in tearDown, which doesn't always run
        # if a test fails - clean up stale test objects from a previous run here
        stale_objects = list(self.repo.find_objects(pid__contains='%s:*' % self.pidspace))
        if stale_objects:
            logger.info('Removing %d stale test object(s) in pidspace %s' \
                % (len(stale_objects), self.pidspace))

            for obj in stale_objects:
                try:
                    self.repo.purge_object(obj.pid)
                except RequestFailed as rf:
                    logger.warn('Error purging stale test object %s (TestCase init): %s' % \
                                (obj.pid, rf))

    def setUp(self):
        # NOTE: queries require RI flush=True or test objects will not show up in RI
        self.repo.risearch.RISEARCH_FLUSH_ON_QUERY = True
        self.opener = self.repo.opener
        self.api = ApiFacade(self.opener)
        fixtures = getattr(self, 'fixtures', [])
        for fixture in fixtures:
            self.ingestFixture(fixture)

    def tearDown(self):
        for pid in self.fedora_fixtures_ingested:
            try:
                self.repo.purge_object(pid)
            except RequestFailed as rf:
                logger.warn('Error purging test object %s in tear down: %s' % \
                            (pid, rf))

    def getNextPid(self):
        pidspace = getattr(self, 'pidspace', None)
        return self.repo.get_next_pid(namespace=pidspace)

    def loadFixtureData(self, fname):
        data = load_fixture_data(fname)
        # if pidspace is specified, get a new pid from fedora and set it as the pid in the xml
        if hasattr(self, 'pidspace'):
            xml = xmlmap.load_xmlobject_from_string(data, _MinimalFoxml)
            xml.pid = self.getNextPid()
            return xml.serialize()
        else:
            return data

    def ingestFixture(self, fname):
        object = self.loadFixtureData(fname)
        pid = self.repo.ingest(object)
        if pid:
            # we'd like this always to be true. if ingest fails we should
            # throw an exception. that probably hasn't been thoroughly
            # tested yet, though, so we'll check it until it has been.
            self.append_pid(pid)

    # note: renamed from append_test_pid so that nosetests doesn't
    # autodetect and attempt to run as a unit test.
    def append_pid(self, pid):
            self.fedora_fixtures_ingested.append(pid)
コード例 #12
0
 def handle(self, *args, **kwargs):
     verbosity = kwargs.get('verbosity', self.v_normal)
     repo = Repository()
     video_duplicates = repo.find_objects(type=Video, label="DELETE")
     for pid in video_duplicates:
         repo.purge_object(pid.pid)
コード例 #13
0
ファイル: test_server.py プロジェクト: dasch124/eulfedora
 def test_badhostname(self):
     self.ingestFixture('object-with-pid.foxml')
     pid = self.fedora_fixtures_ingested[0]
     repo = Repository('http://bogus.host.name.foo:8080/fedora/')
     # TODO: currently just a URLError; make test more specific if we add more specific exceptions
     self.assertRaises(Exception, list, repo.find_objects(pid=pid))
コード例 #14
0
ファイル: test_server.py プロジェクト: dasch124/eulfedora
 def test_nonssl(self):
     self.ingestFixture('object-with-pid.foxml')
     pid = self.fedora_fixtures_ingested[0]
     repo = Repository(FEDORA_ROOT_NONSSL, FEDORA_USER, FEDORA_PASSWORD)
     found = list(repo.find_objects(pid=pid))
     self.assertEqual(1, len(found))
コード例 #15
0
ファイル: FedoraFs.py プロジェクト: emory-libraries/fedorafs
class FedoraFS(fuse.Fuse):
    def __init__(self, *args, **kw):
        # fedora-specific configuration parameters
	# these are the defaults; can be overridden with command-line options
        self.base_url = 'http://localhost:8080/fedora/'
        self.username = '******'
        self.password = '******'
        self.filter = None

        self.towrite = {}

        fuse.Fuse.__init__(self, *args, **kw)
        self._members = None
        self.files = {}
        self.lastfiles = {}
        self.objects = {}

    def main(self, args=None):
        # initialize fedora connection AFTER command line options have been parsed

        # eulcore.fedora.sever requries base_url with a trailing slash
        if not self.base_url.endswith('/'):
            self.base_url = '%s/' % self.base_url

        # if there is a username and an *empty* password, prompt user
        if self.username and not self.password:
            self.password = getpass()
        # TODO: support netrc for credentials?
        # TODO: catch invalid credentials!! (don't retry if invalid)
            
        self.repo = Repository(self.base_url, self.username, self.password)
        self.repo.default_object_type = FsObject
        fuse.Fuse.main(self, args)

    @property
    def members(self):
        if self._members is None:
            # initialize list of members for top-level directory
            # only search if pid list has not already been populated
            found = self.repo.find_objects(terms=self.filter)
            if found:
                self._members = {}
                for i in range(150):     #  ??? how to limit this reasonably?
                #for i in range(10):     #  ??? how to limit this reasonably?
                    obj = found.next()
                    self._members[obj.pid] = obj
        return self._members

    def getattr(self, path):        
        path_els = path.strip('/').split('/')
        logger.debug('getattr for path %s - path elements are %s' % (path, path_els))
        st = FsStat()	 # access time defaults to now

        if path == '/':
            st.st_mode = stat.S_IFDIR | 0755
            # for a directory, number of links should be subdirs + 2
            # make sure pid list is up-to-date before calculating
            logger.debug('members are %s, count is %s' % (self.members, len(self.members.keys())))
            st.st_nlink = 2 + len(self.members.keys())
            return st
        else:
            if path_els[0] in self.members:
                return self.members[path_els[0]].fs_attr(*path_els[1:])
            else:
                return None     # ??
            #st.st_mode = stat.S_IFREG | 0444
        return st

    def readdir (self, path, offset):
        dir_entries = [ '.', '..' ]

        # FIXME: howto use offset for large directories ?        
        path_els = path.strip('/').split('/')
        logger.debug('readdir for path %s, offset %s - path elements are %s' % \
                    (path, offset, path_els))

        if path == '/':        # if root 
            dir_entries.extend([obj.fs_name() for obj in self.members.itervalues()])
        elif path_els[0] in self.members:
            dir_entries.extend(self.members[path_els[0]].fs_members(*path_els[1:]))

        logger.debug('dir entries for %s are %s' % (path, dir_entries))
        for r in dir_entries:
            yield fuse.Direntry(r)

    def read(self, path, size, offset):
        logger.debug('read path=%s, size=%s, offset=%s' % (path, size, offset))
        path_els = path.strip('/').split('/')

        # FIXME: not very efficient
        if path_els[0] in self.members:
            str = self.members[path_els[0]].fs_read(*path_els[1:])

        slen = len(str)
        if offset < slen:
            if offset + size > slen:
               size = slen - offset
            buf = str[offset:offset+size]
        else:
            buf = ''
        return buf

    def write(self, path, buf, offset):
        path_els = path.strip('/').split('/')
        logger.debug('write: offset %s, buf: %s' % (offset, buf))        
         
        # currently only expect to be writing datastreams
        if len(path_els) == 2 and path_els[0] in self.members and \
            path_els[1] in self.members[path_els[0]].fs_members(writable_only=True):
            # ok to write for this path
            if path not in self.towrite.keys():
                if offset != 0:
                    return -errno.ENOSYS    # don't allow starting to write a file in the middle
                self.towrite[path] = MutableString()
                
            self.towrite[path][offset:len(buf)] = buf
            return len(buf)
            
        else:
            # attempting to write somtehing we don't handle
            return -errno.ENOSYS            

    def fsync(self, path, isfsyncfile):
        # FIXME: what does isfsyncfile do ?
        # actually write the contents to fedora

        # assuming if there is data to write, file should be written (?)
        if path in self.towrite.keys():
            contents = str(self.towrite[path])
            logger.debug('fsync contents: %s' % contents)
            if contents:
                path_els = path.strip('/').split('/')
                # stuff should only get in towrite if it was determined to be writable                
                if self.members[path_els[0]].fs_write(path_els[1], contents):                
                    logger.debug('%s updated successfully' % path)

            del self.towrite[path]

        return 0


    def mknod(self, path, mode, dev):
        return 0

    def unlink(self, path):
        return 0

    def release(self, path, flags):
        return 0

    def open(self, path, flags):
        return 0

    def truncate(self, path, size):
        return 0

    def utime(self, path, times):
        return 0

    def mkdir(self, path, mode):
        return 0

    def rmdir(self, path):
        return 0

    def rename(self, pathfrom, pathto):
        return 0

    def readlink(self, path):
        logger.debug('readlink path=%s' % path)
        # for now, the only symlink in use is /pid/relation/pid
        # FIXME: can we shift this logic to the model somehow?
        els = path.split('/')
        pid = els[-1]    #  last element is pid - link to top-level pid entry
        newpath = "../../" + pid
        return newpath

    def fuseoptref(self):
        fuse_args = fuse.FuseArgs()
        fuse.args.optlist = {"url": self.base_url,
                             "username": self.username,
                             "password": self.password,
                             "filter":  self.filter,
                             }
        return fuse_args