Example #1
0
class DatastreamXml():
    def __init__(self, pid, repocls=None, server="Development"):
        self.repo = repocls
        if not repo:
            username, password, root = repo.Get_Configs(server)
            self.repo = Repository(root=root,
                                   username=username,
                                   password=password)
        self.pid = pid
        self.GetObject()

    def GetObject(self):
        self.digital_object = self.repo.get_object(self.pid)

    def ReplaceDs(self, dsid, xml_path):
        self.dsid = dsid
        self.xml_path = xml_path
        xml_object = self._MakeXmlObject()
        digital_object = self.repo.get_object(self.pid)
        datastream = DatastreamObject(digital_object, self.dsid)
        datastream.content = xml_object
        new_datastream.label = "_".join(self.pid.replace(":", ""), dsid)
        new_datastream.save()

    def _MakeXmlObject(self):
        return xmlmap.load_xmlobject_from_file(self.xml_path)

    def GetMarcDs(self):
        self.marcxml_object = self.digital_object.getDatastreamObject(
            "MARCXML")
        self.marcxml_content = self.marcxml_object.content.serialize()
        self.marc_tree = etree.fromstring(self.marcxml_content)
        return self.marc_tree
Example #2
0
class PdfToTextTest(unittest.TestCase):
    fixture_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               'fixtures')
    pdf_filepath = os.path.join(fixture_dir, 'test.pdf')
    pdf_text = 'This is a short PDF document to use for testing.'

    def setUp(self):
        self.repo = Repository(settings.FEDORA_ROOT, settings.FEDORA_USER,
                               settings.FEDORA_PASSWORD)
        with open(self.pdf_filepath) as pdf:
            self.pdfobj = self.repo.get_object(type=TestPdfObject)
            self.pdfobj.label = 'eulindexer test pdf object'
            self.pdfobj.pdf.content = pdf
            self.pdfobj.save()

    def tearDown(self):
        self.repo.purge_object(self.pdfobj.pid)

    def test_file(self):
        # extract text from a pdf from a file on the local filesystem
        text = pdf_to_text(open(self.pdf_filepath, 'rb'))
        self.assertEqual(self.pdf_text, text)

    def test_object_datastream(self):
        # extract text from a pdf datastream in fedora
        pdfobj = self.repo.get_object(self.pdfobj.pid, type=TestPdfObject)
        text = pdf_to_text(pdfobj.pdf.content)
        self.assertEqual(self.pdf_text, text)
Example #3
0
class PdfToTextTest(unittest.TestCase):
    fixture_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'fixtures')
    pdf_filepath = os.path.join(fixture_dir, 'test.pdf')
    pdf_text = 'This is a short PDF document to use for testing.'

    def setUp(self):
        self.repo = Repository(settings.FEDORA_ROOT, settings.FEDORA_USER,
                               settings.FEDORA_PASSWORD)
        with open(self.pdf_filepath) as pdf:
            self.pdfobj = self.repo.get_object(type=TestPdfObject)
            self.pdfobj.label = 'eulindexer test pdf object'
            self.pdfobj.pdf.content = pdf
            self.pdfobj.save()

    def tearDown(self):
        self.repo.purge_object(self.pdfobj.pid)
        
    def test_file(self):
        # extract text from a pdf from a file on the local filesystem
        text = pdf_to_text(open(self.pdf_filepath, 'rb'))
        self.assertEqual(self.pdf_text, text)

    def test_object_datastream(self):
        # extract text from a pdf datastream in fedora
        pdfobj = self.repo.get_object(self.pdfobj.pid, type=TestPdfObject)
        text = pdf_to_text(pdfobj.pdf.content)
        self.assertEqual(self.pdf_text, text)
Example #4
0
    def process_items(self):

        digwf_api = Client(self.options.digwf_url)
        repo = Repository(self.options.fedora_url)

        for item_id in self.options.item_ids:
            try:
                result = digwf_api.get_items(item_id=item_id)
            except requests.exceptions.HTTPError as err:
                print 'Domokun Connection Error! Unable to query DigWF REST API for %s: %s' % (
                    item_id, err)
                continue

            try:
                r = requests.head(self.options.fedora_url)
                # prints the int of the status code.
            except requests.ConnectionError:
                print 'Fedora Connection Error! Unable to query Fedora REST API'
                continue

            if result.count == 1:
                item = result.items[0]
                print 'Found item %s (pid %s, control key %s, marc %s)' % \
                    (item_id, item.pid or '-', item.control_key,
                     item.marc_path)
                try:
                    repo.get_object(pid=item.pid)
                except requests.exceptions.HTTPError as err:
                    print 'Fedora Connection Error! Unable to query Fedora REST API for %s: %s' % (
                        item.pid, err)
                    continue

            elif result.count == 0:
                print 'No item found for this item id %s' % item_id
                continue
            else:
                # shouldn't get more than one match when looking up by
                # item id, but just in case
                print 'Error! DigWF returned %d matches for this item id %s' % \
                    (result.count, item_id)

                continue

            # returns a bagit bag object.
            newbag = LsdiBaggee(item, repo).create_bag(self.options.output)

            # generate source organization summary for this bag
            # self.load_source_summary(newbag)

            print 'Bag created at %s' % newbag
Example #5
0
    def process(self, input):
        """
        Write the input to the given path.
        """
        if input is None:
            return
        #if not os.environ.get("NODETREE_WRITE_FILEOUT"):
        #    return input

        repo = Repository(self._params.get("url"), self._params.get("username"),
                self._params.get("password"))
        try:
            buf = StringIO()
            Image.fromarray(input).save(buf, self._params.get("format").upper())
        except IOError:
            raise exceptions.NodeError(
                    "Error obtaining image buffer in format: %s" % 
                        self._params.get("format").upper(), self)

        pclass = get_fedora_proxy_class(self._params.get("dsid"))
        obj = repo.get_object(self._params.get("pid"), type=pclass)
        obj.DATASTREAM.content = buf
        obj.DATASTREAM.label = "Test Ingest Datastream 1"
        obj.DATASTREAM.mimetype = "image/%s" % self._params.get("format")
        obj.save()
        return input
Example #6
0
def volume_modified(request, pid):
    'last modification time for a single volume'
    solr = solr_interface()
    results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified,
    # and index timestamp for a volume will be updated when pages are added

    # if a user is logged in, page should show as modified
    # when annotation count changes
    latest_note = None
    if request.user.is_authenticated():
        # NOTE: shouldn't be very expensive to init volume here; not actually
        # making any api calls, just using volume to get volume
        # uri and associated annotations
        repo = Repository()
        vol = repo.get_object(pid, type=Volume)
        # newest annotation creation for pages in this volume
        latest_note = vol.annotations().visible_to(request.user) \
                         .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Example #7
0
def main(argv):

    pids = []    
    s = solr.SolrConnection('%s/solr' % HOST)
    repo = Repository(root='%s/fedora/' % HOST, username='******' % fedoraUser, password='******' % fedoraPass)
    results = repo.risearch.sparql_query('PREFIX dime: <http://dimenovels.org/ontology#> select ?pid where {?pid <fedora-rels-ext:isMemberOfCollection> <info:fedora/dimenovels:fame> . OPTIONAL { ?pid dime:IsCopyOf ?copy } FILTER (! BOUND(?copy)) }')
    for row in results:
        for k, v in row.items():
            pids.append(v.replace('info:fedora/', ''))

    with open('C:/Users/a1691506/Desktop/ffw_editions.csv', mode='r') as infile:
        reader = csv.reader(infile)
        editionDict = {rows[0]:rows[1] for rows in reader}

    for p in pids:

        print "Processing %s" % p

        try:
            response = s.query('PID:"%s"' % p)
            
            for hit in response.results:
                number = hit['mods_series_number_ms'][0].split(' ')[1]

            editionUri = editionDict[number]
                
            obj = repo.get_object(p)
            obj.add_relationship('http://dimenovels.org/ontology#IsCopyOf', editionUri)
            obj.save()
        except:
            print "%s failed. Check it!" % p
            continue
Example #8
0
def download_file(pid, dsid):
    repo = Repository(testsettings.FEDORA_ROOT_NONSSL, testsettings.FEDORA_USER,
                      testsettings.FEDORA_PASSWORD)
    obj = repo.get_object(pid)
    ds = obj.getDatastreamObject(dsid)

    widgets = ['Download: ', progressbar.widgets.Percentage(), ' ',
               progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(),
               ' ', progressbar.widgets.FileTransferSpeed()]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start()

    # download content to a tempfile
    tmpfile = tempfile.NamedTemporaryFile(
        prefix='%s-%s_' % (pid, dsid), delete=False)
    print('writing to ', tmpfile.name)
    size_read = 0
    try:
        for chunk in ds.get_chunked_content():
            size_read += len(chunk)
            pbar.update(size_read)
            tmpfile.write(chunk)
    except Exception:
        raise
class Command(BaseCommand):
    ''' This command run through all the articles and makes sure that journal titles and publishers match against Sherpa Romeo
    '''
    args = "[netid netid ...]"
    help = __doc__

    option_list = BaseCommand.option_list + (
        make_option('--noact', '-n',
                    action='store_true',
                    default=False,
                    help='Fixed all caps title in articles'),
        )

    def handle(self, *args, **options):

        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1


        #connection to repository
        self.repo = Repository(settings.FEDORA_ROOT, username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_PASSWORD)
        pid_set = self.repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, type=Publication)
        coll =  self.repo.get_object(pid=settings.PID_ALIASES['oe-collection'])
        try:
            articles = Paginator(pid_set, 100)

        except Exception as e:
            self.output(0, "Error paginating items: : %s " % (e.message))

        #process all Articles
        for p in articles.page_range:
            try:
                objs = articles.page(p).object_list
            except Exception as e:
                #print error and go to next iteration of loop
                self.output(0,"Error getting page: %s : %s " % (p, e.message))
                continue
            for article in objs:
                try:
                    if not article.exists:
                        self.output(0, "Skipping %s because pid does not exist" % article.pid)
                        continue
                    else:
                        
                        print coll
                        print article.pid
                        article.collection = coll
                        ark_uri = '%sark:/25593/%s' % (settings.PIDMAN_HOST, article.pid.split(':')[1])
                        article.dc.content.identifier_list.extend([ark_uri])
                        article.save()
        
                except Exception as e:
                    self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
                    # self.counts['errors'] +=1


    def output(self, v, msg):
        '''simple function to handle logging output based on verbosity'''
        if self.verbosity >= v:
            self.stdout.write("%s\n" % msg)
Example #10
0
def datastream_etag(request, pid, dsid, type=None, repo=None, accept_range_request=False, **kwargs):
    """Method suitable for use as an etag function with
    :class:`django.views.decorators.http.condition`.  Takes the same
    arguments as :meth:`~eulfedora.views.raw_datastream`.
    """

    # if a range is requested and it is not for the entire file,
    # do *NOT* return an etag
    if accept_range_request and request.META.get("HTTP_RANGE", None) and request.META["HTTP_RANGE"] != "bytes=1-":
        return None

    try:
        if repo is None:
            repo = Repository()
        get_obj_opts = {}
        if type is not None:
            get_obj_opts["type"] = type
        obj = repo.get_object(pid, **get_obj_opts)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists and ds.checksum_type != "DISABLED":
            return ds.checksum
    except RequestFailed:
        pass

    return None
Example #11
0
def datastream_etag(request,
                    pid,
                    dsid,
                    type=None,
                    repo=None,
                    accept_range_request=False,
                    **kwargs):
    '''Method suitable for use as an etag function with
    :class:`django.views.decorators.http.condition`.  Takes the same
    arguments as :meth:`~eulfedora.views.raw_datastream`.
    '''

    # if a range is requested and it is not for the entire file,
    # do *NOT* return an etag
    if accept_range_request and request.META.get('HTTP_RANGE', None) and \
       request.META['HTTP_RANGE'] != 'bytes=1-':
        return None

    try:
        if repo is None:
            repo = Repository()
        get_obj_opts = {}
        if type is not None:
            get_obj_opts['type'] = type
        obj = repo.get_object(pid, **get_obj_opts)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists and ds.checksum_type != 'DISABLED':
            return ds.checksum
    except RequestFailed:
        pass

    return None
Example #12
0
def postcard_image(request, pid, size):
    '''Lin to postcard image in requested size.

    :param pid: postcard object pid
    :param size: size to return, one of thumbnail, medium, or large
    '''

    # NOTE: formerly this served out actual image content, via
    # fedora dissemination & djatoka
    # Images now use an IIIF image server; adding redirects here
    # for the benefit of search engines or indexes referencing
    # the old urls
    try:
        repo = Repository()
        obj = repo.get_object(pid, type=ImageObject)
        if not obj.exists:
            raise Http404

        if size == 'thumbnail':
            url = obj.thumbnail_url
        elif size == 'medium':
            url = obj.medium_img_url
        elif size == 'large':
            url = obj.large_img_url

        return HttpResponsePermanentRedirect(url)

    except RequestFailed:
        raise Http404
Example #13
0
    def handle(self, *pids, **options):
        # bind a handler for interrupt signal
        signal.signal(signal.SIGINT, self.interrupt_handler)

        verbosity = int(options.get('verbosity', self.v_normal))

        repo = Repository()
        try:
            pidman = DjangoPidmanRestClient()
        except Exception as err:
            # error if pid manager config options not in localsettings
            raise CommandError(err)

        old_page_target = '%s/books/pages/' % Site.objects.get_current().domain
        search_args = {'type':'ark', 'target': old_page_target, 'count': 10}
        # get a small result set to retrieve the total
        results = pidman.search_pids(**search_args)
        total = results['results_count']
        # then set a larger page size for actual processing
        search_args['count'] = 100
        if verbosity >= self.v_normal:
            print 'Found %d total page ARKs with targets to be updated' % total

        pbar = ProgressBar(widgets=[Percentage(),
            ' (', Counter(), ')',
            Bar(),
            ETA()],
            maxval=total).start()

        self.stats = defaultdict(int)
        self.processed = set()
        for ark in self.get_search_results(pidman, search_args):
            self.processed.add(ark['pid'])
            # get fedora pid from target uri
            target_uri = ark['targets'][0]['target_uri']
            baseurl, pid = target_uri.rstrip('/').rsplit('/', 1)
            try:
                page = repo.get_object(pid, type=Page)
                # this should probably only happen in dev/qa
                if not page.exists:
                    if verbosity > self.v_normal:
                        self.stderr.write('Page %s does not exist' % pid)
                    self.stats['notfound'] += 1
                else:
                    # check if volume exists?
                    pidman.update_ark_target(ark['pid'], target_uri=page.absolute_url)
                    self.stats['updated'] += 1
            except RequestFailed as rf:
                print 'Error accessing %s: %s' % (pid, rf)
                self.stats['error'] += 1

            pbar.update(len(self.processed))
            if self.interrupted:
                break

        if not self.interrupted:
            pbar.finish()
        # summarize
        self.stderr.write('Updated %(updated)d, %(error)d error(s), %(notfound)d not found' \
            % self.stats)
Example #14
0
    def process(self, input):
        """
        Write the input to the given path.
        """
        if input is None:
            return
        #if not os.environ.get("NODETREE_WRITE_FILEOUT"):
        #    return input

        repo = Repository(self._params.get("url"),
                          self._params.get("username"),
                          self._params.get("password"))
        try:
            buf = StringIO()
            Image.fromarray(input).save(buf,
                                        self._params.get("format").upper())
        except IOError:
            raise exceptions.NodeError(
                "Error obtaining image buffer in format: %s" %
                self._params.get("format").upper(), self)

        pclass = get_fedora_proxy_class(self._params.get("dsid"))
        obj = repo.get_object(self._params.get("pid"), type=pclass)
        obj.DATASTREAM.content = buf
        obj.DATASTREAM.label = "Test Ingest Datastream 1"
        obj.DATASTREAM.mimetype = "image/%s" % self._params.get("format")
        obj.save()
        return input
Example #15
0
class VolumePageList(ListView, VaryOnCookieMixin):
    '''Display a paginated list of :class:`~readux.books.models.Page`
    objects associated with a single :class:`~readux.books.models.Volume`.
    Pages are displayed by thumbnail; thumbnails include an annotation count
    indicator for logged in users with annotations.
    '''

    template_name = 'books/volume_pages_list.html'
    paginate_by = 30
    context_object_name = 'pages'

    @method_decorator(last_modified(view_helpers.volume_pages_modified))
    def dispatch(self, *args, **kwargs):
        return super(VolumePageList, self).dispatch(*args, **kwargs)

    def get_queryset(self):
        self.repo = Repository(request=self.request)
        # store the volume for use in get_context_data
        self.vol = self.repo.get_object(self.kwargs['pid'], type=Volume)
        if not self.vol.exists or not self.vol.is_a_volume:
            raise Http404
        return self.vol.find_solr_pages()

    def get_context_data(self, **kwargs):
        context_data = super(VolumePageList, self).get_context_data()

        context_data.update({
            'vol': self.vol,
            'form': BookSearch(), # form for searching in this book
        })

        # if user is authenticated, check for annotations on this volume
        if self.request.user.is_authenticated():
            notes = self.vol.page_annotation_count(self.request.user)
            # method returns a dict for easy lookup;
            # strip out base site url for easy lookup in the template
            # (need leading / left to match item urls)
            domain = get_current_site(self.request).domain.rstrip('/')
            if not domain.startswith('https'):
                domain = 'https://' + domain

            annotated_pages = dict([(k.replace(domain, ''), v)
                                   for k, v in notes.iteritems()])
        else:
            annotated_pages = {}
        context_data.update({
            'annotated_pages': annotated_pages,
            'annotation_search_enabled': bool(annotated_pages)
        })

        # Check if the first page of the volume is wider than it is tall
        # to set the layout of the pages
        first_page = self.vol.pages[0]
        if first_page.width > first_page.height:
            layout = 'landscape'
        else:
            layout = 'default'
        context_data['layout'] = layout

        return context_data
Example #16
0
def volume_modified(request, pid):
    'last modification time for a single volume'
    solr = solr_interface()
    results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified,
    # and index timestamp for a volume will be updated when pages are added

    # if a user is logged in, page should show as modified
    # when annotation count changes
    latest_note = None
    if request.user.is_authenticated():
        # NOTE: shouldn't be very expensive to init volume here; not actually
        # making any api calls, just using volume to get volume
        # uri and associated annotations
        repo = Repository()
        vol = repo.get_object(pid, type=Volume)
        # newest annotation creation for pages in this volume
        latest_note = vol.annotations().visible_to(request.user) \
                         .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Example #17
0
def download_file(pid, dsid):
    repo = Repository(testsettings.FEDORA_ROOT_NONSSL,
                      testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)
    obj = repo.get_object(pid)
    ds = obj.getDatastreamObject(dsid)

    widgets = [
        'Download: ',
        progressbar.widgets.Percentage(), ' ',
        progressbar.widgets.Bar(), ' ',
        progressbar.widgets.ETA(), ' ',
        progressbar.widgets.FileTransferSpeed()
    ]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start()

    # download content to a tempfile
    tmpfile = tempfile.NamedTemporaryFile(prefix='%s-%s_' % (pid, dsid),
                                          delete=False)
    print('writing to ', tmpfile.name)
    size_read = 0
    try:
        for chunk in ds.get_chunked_content():
            size_read += len(chunk)
            pbar.update(size_read)
            tmpfile.write(chunk)
    except Exception:
        raise
Example #18
0
    def get(self, request):
        context = {}
        item_id = request.GET.get('id', None)
        fmt = request.GET.get('format', None)
        if item_id is not None:
            context['id'] = item_id
            repo = Repository(request=self.request)
            # generalized class-based view would need probably a get-item method
            # for repo objects, could use type-inferring repo variant
            obj = repo.get_object(item_id, type=Volume)

            formats = obj.unapi_formats

            if fmt is None:
                # display formats for this item
                context['formats'] = formats
            else:
                current_format = formats[fmt]
                # return requested format for this item
                meth = getattr(obj, current_format['method'])
                return HttpResponse(meth(), content_type=current_format['type'])

        else:
            # display formats for all items
            # NOTE: if multiple classes, should be able to combine the formats
            context['formats'] = Volume.unapi_formats

        # NOTE: doesn't really even need to be a template, could be generated
        # with eulxml just as easily if that simplifies reuse
        return render(request, 'books/unapi_format.xml', context,
            content_type='application/xml')
Example #19
0
def main(argv):

    csvfile = open ("C:/Users/a1691506/Desktop/repo_size.csv", 'wb')
    csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    repo = Repository(root='%s/fedora/' % HOST, username='******' % fedoraUser, password='******' % fedoraPass)
    risearch = repo.risearch
    query = 'select ?pid ?date where {?pid <fedora-model:hasModel> <info:fedora/fedora-system:FedoraObject-3.0> ; <fedora-model:createdDate> ?date . } ORDER BY ASC(?date)'

    pids = risearch.find_statements(query, language='sparql', type='tuples', flush=None)

    repo_size = 0

    for dictionary in pids:

        p = dictionary['pid']
        pid = p.replace('info:fedora/', '')

        dateCreated = dictionary['date']

        total_size = 0
        obj = repo.get_object(pid)
        datastreams = obj.ds_list
        for datastream in datastreams:
            ds = obj.getDatastreamObject(datastream)
            size = ds.size
            total_size += size
        repo_size += total_size
        
        print "Total size for %s: %s" % (pid, total_size)

        csvwriter.writerow([pid, dateCreated, total_size, repo_size])
def main(argv):

    repo = Repository(root='%s/fedora/' % HOST, username='******' % fedoraUser, password='******' % fedoraPass)
    risearch = repo.risearch
    query = 'select ?pid where {?pid <fedora-view:disseminates> ?ds . ?pid <fedora-model:hasModel> <info:fedora/islandora:pageCModel> . ?ds <fedora-view:disseminationType> <info:fedora/*/PDF>}'

    pids = risearch.find_statements(query, language='sparql', type='tuples', flush=None)

    #total = 0
    

    for dictionary in pids:

      for key in dictionary:

        p = dictionary[key]
        pid = p.replace('info:fedora/', '')


        obj = repo.get_object(pid)
        pdf = obj.getDatastreamObject("PDF")
        #size = pdf.size
        #total += size
        obj.api.purgeDatastream(pid, "PDF")
        obj.save()
        
        print "Purged PDF for %s" % pid
    def _load_postcard(self, label, description, subjects, filename):
        '''Create a postcard object and load to fedora.

        :param label: object label and dc:title
        :param description: object dc:description
        :param subjects: list of subjects to be set in dc:subject
        :param filename: filename for image content, assumed relative to current directory
        '''
        # NOTE: image object init here somewhat redundant with current postcard ingest logic
        repo = Repository()
        obj = repo.get_object(type=ImageObject)
        obj.label = label
        obj.owner = settings.FEDORA_OBJECT_OWNERID
        obj.dc.content.title = obj.label
        obj.dc.content.description_list.extend(description)
        obj.dc.content.subject_list.extend(subjects)
        # common DC for all postcards
        obj.dc.content.type = 'image'
        # FIXME: configure this somewhere?
        obj.dc.content.relation_list.extend([settings.RELATION,
                                 'http://beck.library.emory.edu/greatwar/'])
        # set file as content of image datastream
        obj.image.content = open(path.join(fixture_path, filename))
        # add relation to postcard collection
        obj.rels_ext.content.add((
                    URIRef(obj.uri),
                    URIRef(MEMBER_OF_COLLECTION),
                    URIRef(PostcardCollection.get().uri)
            ))
        obj.save()
        self.postcards.append(obj)
Example #22
0
    def get(self, request):
        context = {}
        item_id = request.GET.get('id', None)
        fmt = request.GET.get('format', None)
        if item_id is not None:
            context['id'] = item_id
            repo = Repository(request=self.request)
            # generalized class-based view would need probably a get-item method
            # for repo objects, could use type-inferring repo variant
            obj = repo.get_object(item_id, type=Volume)

            formats = obj.unapi_formats

            if fmt is None:
                # display formats for this item
                context['formats'] = formats
            else:
                current_format = formats[fmt]
                # return requested format for this item
                meth = getattr(obj, current_format['method'])
                return HttpResponse(meth(),
                                    content_type=current_format['type'])

        else:
            # display formats for all items
            # NOTE: if multiple classes, should be able to combine the formats
            context['formats'] = Volume.unapi_formats

        # NOTE: doesn't really even need to be a template, could be generated
        # with eulxml just as easily if that simplifies reuse
        return render(request,
                      'books/unapi_format.xml',
                      context,
                      content_type='application/xml')
Example #23
0
class VolumePageList(ListView, VaryOnCookieMixin):
    '''Display a paginated list of :class:`~readux.books.models.Page`
    objects associated with a single :class:`~readux.books.models.Volume`.
    Pages are displayed by thumbnail; thumbnails include an annotation count
    indicator for logged in users with annotations.
    '''

    template_name = 'books/volume_pages_list.html'
    paginate_by = 30
    context_object_name = 'pages'

    @method_decorator(last_modified(view_helpers.volume_pages_modified))
    def dispatch(self, *args, **kwargs):
        return super(VolumePageList, self).dispatch(*args, **kwargs)

    def get_queryset(self):
        self.repo = Repository(request=self.request)
        # store the volume for use in get_context_data
        self.vol = self.repo.get_object(self.kwargs['pid'], type=Volume)
        if not self.vol.exists or not self.vol.is_a_volume:
            raise Http404
        return self.vol.find_solr_pages()

    def get_context_data(self, **kwargs):
        context_data = super(VolumePageList, self).get_context_data()

        context_data.update({
            'vol': self.vol,
            'form': BookSearch(),  # form for searching in this book
        })

        # if user is authenticated, check for annotations on this volume
        if self.request.user.is_authenticated():
            notes = self.vol.page_annotation_count(self.request.user)
            # method returns a dict for easy lookup;
            # strip out base site url for easy lookup in the template
            # (need leading / left to match item urls)
            domain = get_current_site(self.request).domain.rstrip('/')
            if not domain.startswith('https'):
                domain = 'https://' + domain

            annotated_pages = dict([(k.replace(domain, ''), v)
                                    for k, v in notes.iteritems()])
        else:
            annotated_pages = {}
        context_data.update({
            'annotated_pages': annotated_pages,
            'annotation_search_enabled': bool(annotated_pages)
        })

        # Check if the first page of the volume is wider than it is tall
        # to set the layout of the pages
        first_page = self.vol.pages[0]
        if first_page.width > first_page.height:
            layout = 'landscape'
        else:
            layout = 'default'
        context_data['layout'] = layout

        return context_data
Example #24
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     vol = repo.get_object(pid, type=Volume)
     if not vol.exists or not vol.is_a_volume:
         raise Http404
     return vol
Example #25
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     vol = repo.get_object(pid, type=Volume)
     if not vol.exists or not vol.is_a_volume:
         raise Http404
     return vol
Example #26
0
def rdf_profile(request, username):
    '''Profile information comparable to the human-readable content
    returned by :meth:`profile`, but in RDF format.'''

    # retrieve user & publications - same logic as profile above
    user, userprofile = _get_profile_user(username)
    articles = userprofile.recent_articles(limit=10)

    # build an rdf graph with information author & publications
    rdf = RdfGraph()
    for prefix, ns in ns_prefixes.iteritems():
        rdf.bind(prefix, ns)
    author_node = BNode()
    profile_uri = URIRef(request.build_absolute_uri(reverse('accounts:profile',
                                                    kwargs={'username': username})))
    profile_data_uri = URIRef(request.build_absolute_uri(reverse('accounts:profile-data',
                                                         kwargs={'username': username})))

    # author information
    rdf.add((profile_uri, FOAF.primaryTopic, author_node))
    rdf.add((author_node, RDF.type, FOAF.Person))
    rdf.add((author_node, FOAF.nick, Literal(user.username)))
    rdf.add((author_node, FOAF.publications, profile_uri))

    try:
        esd_data = userprofile.esd_data()
    except EsdPerson.DoesNotExist:
        esd_data = None

    if esd_data:
        rdf.add((author_node, FOAF.name, Literal(esd_data.directory_name)))
    else:
        rdf.add((author_node, FOAF.name, Literal(user.get_full_name())))

    if esd_data and not userprofile.suppress_esd_data:
        mbox_sha1sum = hashlib.sha1(esd_data.email).hexdigest()
        rdf.add((author_node, FOAF.mbox_sha1sum, Literal(mbox_sha1sum)))
        if esd_data.phone:
            rdf.add((author_node, FOAF.phone, URIRef('tel:' + esd_data.phone)))

    # TODO: use ESD profile data where appropriate
    # (and honor internet/directory suppressed, suppression override)

    # article information
    repo = Repository(request=request)
    for record in articles:
        obj = repo.get_object(record['pid'], type=Publication)
        obj_node = BNode() # info:fedora/ uri is not public

        # relate to author
        rdf.add((author_node, FRBR.creatorOf, obj_node))
        rdf.add((author_node, FOAF.made, obj_node))
        # add object rdf
        rdf += obj.as_rdf(node=obj_node)

    response = HttpResponse(rdf.serialize(), content_type='application/rdf+xml')
    response['Content-Location'] = profile_data_uri
    return response
Example #27
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     obj = repo.get_object(pid, type=Collection)
     # if pid doesn't exist or isn't a collection, 404
     if not obj.exists or not obj.has_requisite_content_models:
         raise Http404
     return obj
Example #28
0
def file(request, pid):
    dsid = FileObject.file.id
    repo = Repository()
    obj = repo.get_object(pid, type=FileObject)
    filename = os.path.basename(obj.dc.content.title)
    extra_headers = {
        'Content-Disposition': "attachment; filename=%s" % filename,
    }
    return raw_datastream(request, pid, dsid, type=FileObject, headers=extra_headers)
Example #29
0
def rdfxml(request, aggId):
    dsid = AggregationObject.rdfxml.id
    repo = Repository()
    obj = repo.get_object(aggId, type=AggregationObject)
    filename = os.path.basename(obj.dc.content.title)
    extra_headers = {
        'Content-Disposition': "attachment; filename=%s" % filename,
    }
    return raw_datastream(request, aggId, dsid, type=AggregationObject, headers=extra_headers)
Example #30
0
def datastream_lastmodified(request, pid, dsid, type):
    repo = Repository()
    try:
        obj = repo.get_object(pid, type=type)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists:
            return ds.created
    except RequestFailed:
        pass
Example #31
0
def init_by_cmodel(pid, request=None):
    # given a pid, initialize the appropriate type of digital object class based on content models
    repo = Repository(request=request)
    obj = repo.get_object(pid)
    # get a lit of content models on the object
    cmodels = list(repo.risearch.get_objects(obj.uri, rdfns.model.hasModel))
    type = None
    for objtype in digital_object_classes:
        # if every content model for the digital object class is present, use that type
        if all(cm in cmodels for cm in objtype.CONTENT_MODELS):
            type = objtype
            break

    # fallback
    if type is None:
        type = FileObject

    return repo.get_object(pid, type=type)
Example #32
0
def datastream_lastmodified(request, pid, dsid, type):
    repo = Repository()
    try:
        obj = repo.get_object(pid, type=type)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists:
            return ds.created
    except RequestFailed:
        pass
Example #33
0
def init_by_cmodel(pid, request=None):
    # given a pid, initialize the appropriate type of digital object class based on content models
    repo = Repository(request=request)
    obj = repo.get_object(pid)
    # get a lit of content models on the object
    cmodels = list(repo.risearch.get_objects(obj.uri, rdfns.model.hasModel))
    type = None
    for objtype in digital_object_classes:
        # if every content model for the digital object class is present, use that type
        if all(cm in cmodels for cm in objtype.CONTENT_MODELS):
            type = objtype
            break

    # fallback
    if type is None:
        type = FileObject

    return repo.get_object(pid, type=type)
Example #34
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     obj = repo.get_object(pid, type=Collection)
     # if pid doesn't exist or isn't a collection, 404
     if not obj.exists or not obj.has_requisite_content_models:
         raise Http404
     return obj
Example #35
0
def view_postcard_large(request, pid):
    '''View a large image of postcard with title only.'''
    repo = Repository()
    try:
        obj = repo.get_object(pid, type=ImageObject)
        obj.label   # access object label to trigger 404 before we get to the template
        return render(request, 'postcards/view_postcard_large.html',
                      {'card': obj})
    except RequestFailed:
        raise Http404
Example #36
0
def display_experiment(request, expId):
    repo = Repository()
    exp_obj = repo.get_object(expId, type=FileObject)
    exp_pids = repo.risearch.get_subjects("info:fedora/fedora-system:def/relations-external#isPartOf", "info:fedora/" + expId)
    related_objects = [] 
    processes = []
    objects = []
    
    # Generate related object links for display
    for obj_pid in exp_pids:    
        related_objects.append(repo.get_object(obj_pid))
            
    for obj in related_objects:
        if (string.find(obj.pid, "process") != -1):
            processes.append(obj)
    for obj in related_objects:
        if (string.find(obj.pid, "process") == -1):
            objects.append(obj)
      
    related_process_links = []
    for o in processes:      
        related_process_links.append("<a href='http://localhost:8000/repo/objects/" + o.dc.content.identifier + "'>" + o.dc.content.title + "</a>")
    
    related_object_links = []
    for o in objects:
        related_object_links.append("<a href='http://localhost:8000/repo/objects/" + o.dc.content.identifier + "'>" + o.dc.content.title + "</a>")
    
    # Add link for downloading aggregation RDF/XML
    aggregations = []
    agg_pids = repo.risearch.get_subjects("http://purl.org/net/opmv/ns#used", "info:fedora/" + expId)
    for agg_pid in agg_pids:
        agg_o = repo.get_object(pid = agg_pid, type = AggregationObject)
        aggregations.append(agg_o)
    
    if len(aggregations) == 0: 
        agg = ""
        rdfa = ""
    else:
        agg = "<a href='http://localhost:8000/repo/aggregations/" + agg_o.dc.content.identifier + "/rdfxml'>RDF/XML</a>"    
        rdfa = agg_o.rdfa.content
    
    return render_to_response('repo/display.html', {'obj': exp_obj, 'agg': agg, 'rdfa': rdfa, 'processes': related_process_links, 'objects': related_object_links})
Example #37
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     vol = repo.get_object(pid, type=Volume)
     # 404 if object doesn't exist, isn't a volume, or doesn't have tei
     if not vol.exists or not vol.is_a_volume or not vol.has_tei:
         raise Http404
     # NOTE: not currently an error if volume doesn't have any
     # annotations, but export is probably not meaningful
     return vol
Example #38
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     vol = repo.get_object(pid, type=Volume)
     # 404 if object doesn't exist, isn't a volume, or doesn't have tei
     if not vol.exists or not vol.is_a_volume or not vol.has_tei:
         raise Http404
     # NOTE: not currently an error if volume doesn't have any
     # annotations, but export is probably not meaningful
     return vol
Example #39
0
 def members(self):
     '''Return all Fedora objects in the repository that are related to the current
     collection via isMemberOfCollection.'''
     # FIXME: loses repo permissions/credentials here... 
     repo = Repository()
     members = repo.risearch.get_subjects(relsext.isMemberOfCollection, self.uri)
     # for now, just returning as generic DigitalObject instances
     for pid in members:
         # TODO: should we restrict to accessible objects only?
         # (requires passing correct credentials through...)
         yield repo.get_object(pid)
Example #40
0
def view_collection(request, pid):
    '''view an existing
    :class:`~genrepo.collection.models.CollectionObject` identified by
    pid.
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=CollectionObject)
    # if the object does not exist or the current user doesn't have
    # permission to see that it exists, 404
    if not obj.exists:
        raise Http404
    return render(request, 'collection/view.html', {'obj': obj})
Example #41
0
 def setUp(self):
     # load test object to test views with
     repo = Repository()
     self.obj = repo.get_object(type=SimpleDigitalObject)
     self.obj.dc.content.title = 'test object for generic views'
     self.obj.text.content = 'sample plain-text content'
     img_file = os.path.join(settings.FEDORA_FIXTURES_DIR, 'test.png')
     self.obj.image.content = open(img_file)
     # force datastream checksums so we can test response headers
     for ds in [self.obj.dc, self.obj.rels_ext, self.obj.text, self.obj.image]:
         ds.checksum_type = 'MD5'
     self.obj.save()
Example #42
0
 def members(self):
     '''Return all Fedora objects in the repository that are related to the current
     collection via isMemberOfCollection.'''
     # FIXME: loses repo permissions/credentials here...
     repo = Repository()
     members = repo.risearch.get_subjects(relsext.isMemberOfCollection,
                                          self.uri)
     # for now, just returning as generic DigitalObject instances
     for pid in members:
         # TODO: should we restrict to accessible objects only?
         # (requires passing correct credentials through...)
         yield repo.get_object(pid)
Example #43
0
def view_collection(request, pid):
    """view an existing
    :class:`~genrepo.collection.models.CollectionObject` identified by
    pid.
    """
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=CollectionObject)
    # if the object does not exist or the current user doesn't have
    # permission to see that it exists, 404
    if not obj.exists:
        raise Http404
    return render(request, "collection/view.html", {"obj": obj})
Example #44
0
 def setUp(self):
     # load test object to test views with
     repo = Repository()
     self.obj = repo.get_object(type=SimpleDigitalObject)
     self.obj.dc.content.title = 'test object for generic views'
     self.obj.text.content = 'sample plain-text content'
     img_file = os.path.join(settings.FEDORA_FIXTURES_DIR, 'test.png')
     self.obj.image.content = open(img_file, mode='rb')
     # force datastream checksums so we can test response headers
     for ds in [
             self.obj.dc, self.obj.rels_ext, self.obj.text, self.obj.image
     ]:
         ds.checksum_type = 'MD5'
     self.obj.save()
Example #45
0
    def test_index_data(self):
        # create a test object for testing index data view
        repo = Repository()
        testobj = repo.get_object(type=SimpleObject)
        testobj.label = 'test object'
        testobj.owner = 'tester'
        testobj.save()
        self.pids.append(testobj.pid)

        # test with request IP not allowed to access the service
        with override_settings(EUL_INDEXER_ALLOWED_IPS=['0.13.23.134']):
            response = index_data(self.request, testobj.pid)
            expected, got = 403, response.status_code
            self.assertEqual(expected, got,
                'Expected %s but returned %s for index_data view with request IP not in configured list' \
                % (expected, got))

        # test with request IP allowed to hit the service
        with override_settings(EUL_INDEXER_ALLOWED_IPS=[self.request_ip]):
            response = index_data(self.request, testobj.pid)
            expected, got = 200, response.status_code
            self.assertEqual(expected, got,
                'Expected %s but returned %s for index_data view' \
                % (expected, got))
            expected, got = 'application/json', response['Content-Type']
            self.assertEqual(expected, got,
                'Expected %s but returned %s for mimetype on index_data view' \
                % (expected, got))
            response_data = json.loads(response.content.decode('utf-8'))
            self.assertEqual(
                testobj.index_data(), response_data,
                'Response content loaded from JSON should be equal to object indexdata'
            )

            # test with basic auth
            testuser, testpass = '******', 'testpass'
            token = base64.b64encode(
                force_bytes('%s:%s' % (testuser, testpass)))
            self.request.META['HTTP_AUTHORIZATION'] = 'Basic %s' % force_text(
                token)
            with patch('eulfedora.indexdata.views.TypeInferringRepository'
                       ) as typerepo:
                typerepo.return_value.get_object.return_value.index_data.return_value = {}
                index_data(self.request, testobj.pid)
                typerepo.assert_called_with(username=testuser,
                                            password=testpass)

            # non-existent pid should generate a 404
            self.assertRaises(Http404, index_data, self.request,
                              'bogus:testpid')
Example #46
0
class ModelUtilsTest(TestCase):
    # tests for utility methods declared in file.models

    repo_admin = None

    def setUp(self):
        # instantiate repo_admin the first time we run, after the test settings are in place
        if self.repo_admin is None:
            self.repo_admin = Repository(username=getattr(settings, 'FEDORA_TEST_USER', None),
                                         password=getattr(settings, 'FEDORA_TEST_PASSWORD', None))
        self.pids = []

    def tearDown(self):
        for pid in self.pids:
            self.repo_admin.purge_object(pid)

    def test_object_type_from_mimetype(self):
        self.assertEqual(ImageObject, object_type_from_mimetype('image/jpeg'))
        self.assertEqual(ImageObject, object_type_from_mimetype('image/gif'))
        self.assertEqual(FileObject, object_type_from_mimetype('image/unsupported-img'))
        self.assertEqual(FileObject, object_type_from_mimetype('text/plain'))
        
    def test_init_by_cmodel(self):
        # create file and image objects to test initialization
        fileobj = self.repo_admin.get_object(type=FileObject)
        fileobj.save()
        imgobj = self.repo_admin.get_object(type=ImageObject)
        imgobj.save()
        self.pids.extend([fileobj.pid, imgobj.pid])
        # init a new object from file pid - should be a file object
        initobj = init_by_cmodel(fileobj.pid)
        self.assert_(isinstance(initobj, FileObject))
        # since ImageObject extends FileObject, confirm that we didn't get the wrong thing
        self.assert_(not isinstance(initobj, ImageObject))
        # image pid should be returned as an ImageObject
        initobj = init_by_cmodel(imgobj.pid)
        self.assert_(isinstance(initobj, ImageObject))
Example #47
0
def object_tags(request, pid):
    '''Set & display private tags on a particular
    :class:`~eulfedora.models.DigitalObject` (saved in the database by
    way of :class:`~openemory.accounts.models.Bookmark`).

    On an HTTP GET, returns a JSON list of the tags for the specified
    object, or 404 if the object has not been tagged.

    On an HTTP PUT, will replace any existing tags with tags from the
    body of the request.  Uses :meth:`taggit.utils.parse_tags` to
    parse tags, with the same logic :mod:`taggit` uses for parsing
    keyword and phrase tags on forms.  After a successul PUT, returns
    the a JSON response with a list of the updated tags.  If the
    Fedora object does not exist, returns a 404 error.
    '''

    # bookmark options that will be used to create a new or find an
    # existing bookmark for either GET or PUT
    bkmark_opts = {'user': request.user, 'pid': pid}

    status_code = 200	# if all goes well, unless creating a new bookmark

    if request.method == 'PUT':
        # don't allow tagging non-existent objects
        # NOTE: this will 404 if a bookmark is created and an object
        # subsequently is removed or otherwise becomes unavailable in
        # the repository
        repo = Repository(request=request)
        obj = repo.get_object(pid)
        # if this fedora API call becomes expensive, may want to
        # consider querying Solr instead
        if not obj.exists:
            raise Http404

        bookmark, created = Bookmark.objects.get_or_create(**bkmark_opts)
        if created:
            status_code = 201
        bookmark.tags.set(*parse_tags(request.read()))
        # fall through to GET handling and display the newly-updated tags
        # should we return 201 when creating a new bookmark ?

    if request.method == 'GET':
        bookmark = get_object_or_404(Bookmark, **bkmark_opts)


    # GET or successful PUT
    tags = [tag.name for tag in bookmark.tags.all()]
    return  HttpResponse(json_serializer.encode(tags), status=status_code,
                         content_type='application/json')
Example #48
0
def object_tags(request, pid):
    '''Set & display private tags on a particular
    :class:`~eulfedora.models.DigitalObject` (saved in the database by
    way of :class:`~openemory.accounts.models.Bookmark`).

    On an HTTP GET, returns a JSON list of the tags for the specified
    object, or 404 if the object has not been tagged.

    On an HTTP PUT, will replace any existing tags with tags from the
    body of the request.  Uses :meth:`taggit.utils.parse_tags` to
    parse tags, with the same logic :mod:`taggit` uses for parsing
    keyword and phrase tags on forms.  After a successul PUT, returns
    the a JSON response with a list of the updated tags.  If the
    Fedora object does not exist, returns a 404 error.
    '''

    # bookmark options that will be used to create a new or find an
    # existing bookmark for either GET or PUT
    bkmark_opts = {'user': request.user, 'pid': pid}

    status_code = 200  # if all goes well, unless creating a new bookmark

    if request.method == 'PUT':
        # don't allow tagging non-existent objects
        # NOTE: this will 404 if a bookmark is created and an object
        # subsequently is removed or otherwise becomes unavailable in
        # the repository
        repo = Repository(request=request)
        obj = repo.get_object(pid)
        # if this fedora API call becomes expensive, may want to
        # consider querying Solr instead
        if not obj.exists:
            raise Http404

        bookmark, created = Bookmark.objects.get_or_create(**bkmark_opts)
        if created:
            status_code = 201
        bookmark.tags.set(*parse_tags(request.read()))
        # fall through to GET handling and display the newly-updated tags
        # should we return 201 when creating a new bookmark ?

    if request.method == 'GET':
        bookmark = get_object_or_404(Bookmark, **bkmark_opts)

    # GET or successful PUT
    tags = [tag.name for tag in bookmark.tags.all()]
    return HttpResponse(json_serializer.encode(tags),
                        status=status_code,
                        mimetype='application/json')
Example #49
0
def curl_download_file(pid, dsid):
    repo = Repository(testsettings.FEDORA_ROOT_NONSSL,
                      testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)
    obj = repo.get_object(pid)
    ds = obj.getDatastreamObject(dsid)

    tmpfile = tempfile.NamedTemporaryFile(prefix='%s-%s_' % (pid, dsid),
                                          delete=False)
    print('writing to ', tmpfile.name)

    widgets = [
        'Download: ',
        progressbar.widgets.Percentage(), ' ',
        progressbar.widgets.Bar(), ' ',
        progressbar.widgets.ETA(), ' ',
        progressbar.widgets.FileTransferSpeed()
    ]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start()

    def progress(dl_total, dl, up_total, up):
        # update current status
        pbar.update(dl)

    c = pycurl.Curl()
    auth = base64.b64encode(
        force_bytes("%s:%s" %
                    (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)))
    headers = {'Authorization': 'Basic %s' % force_text(auth)}
    c.setopt(pycurl.VERBOSE, 1)
    c.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()])

    # /objects/{pid}/datastreams/{dsID}/content ? [asOfDateTime] [download]
    c.setopt(c.URL, '%sobjects/%s/datastreams/%s/content' % \
        (testsettings.FEDORA_ROOT_NONSSL, pid, dsid))
    # c.setopt(c.WRITEDATA, buffer)
    c.setopt(c.WRITEFUNCTION, tmpfile.write)
    c.setopt(c.XFERINFOFUNCTION, progress)
    c.setopt(c.NOPROGRESS, False)
    c.perform()

    # HTTP response code, e.g. 200.
    print('Status: %d' % c.getinfo(c.RESPONSE_CODE))
    # Elapsed time for the transfer.
    print('Time: %f' % c.getinfo(c.TOTAL_TIME))

    c.close()
Example #50
0
    def cascade_updated_articles(self):
        '''Reindex all articles associated with faculty who have been
        updated (either article-indexed person data has changed or
        a previously-indexed faculty member is no longer in ESD).
        '''
        updated_articles = set()
        for username in self.updated_faculty:
            for article in self.articles_by_faculty(username):
                updated_articles.add(article['pid'])

        repo = Repository()
        for pid in updated_articles:
            if self.verbosity >= self.v_all:
                print 'Indexing article', pid
            article = repo.get_object(pid, type=Article)
            self.solr.add(article.index_data())
    def cascade_updated_articles(self):
        '''Reindex all articles associated with faculty who have been
        updated (either article-indexed person data has changed or
        a previously-indexed faculty member is no longer in ESD).
        '''
        updated_articles = set()
        for username in self.updated_faculty:
            for article in self.articles_by_faculty(username):
                updated_articles.add(article['pid'])

        repo = Repository()
        for pid in updated_articles:
            if self.verbosity >= self.v_all:
                print 'Indexing article', pid
            article = repo.get_object(pid, type=Publication)
            self.solr.add(article.index_data())
Example #52
0
    def test_set_oai_id(self):
        repo = Repository()
        fileobj = repo.get_object(type=FileObject)
        oai_id = 'oai:ark:/25593/123'
        # set
        fileobj.oai_id = oai_id
        self.assert_('<oai:itemID>%s</oai:itemID>' % oai_id in
                     fileobj.rels_ext.content.serialize())
        # get
        self.assertEqual(oai_id, fileobj.oai_id)
        # del
        del fileobj.oai_id
        self.assert_('<oai:itemID>' not in fileobj.rels_ext.content.serialize())

        # set None - should be equivalent to delete
        fileobj.oai_id = None
        self.assert_('<oai:itemID>' not in fileobj.rels_ext.content.serialize())
Example #53
0
def raw_audit_trail(request, pid, type=None, repo=None):
    '''View to display the raw xml audit trail for a Fedora Object.
    Returns an :class:`~django.http.HttpResponse` with the response content
    populated with the content of the audit trial. 

    If the object is not found or does not have an audit trail, raises
    an :class:`~django.http.Http404` .  For any other errors (e.g.,
    permission denied by Fedora), the exception is not caught and
    should be handled elsewhere.
    
    :param request: HttpRequest
    :param pid: Fedora object PID
    :param repo: :class:`~eulcore.django.fedora.server.Repository` instance to use,
        in case your application requires custom repository initialization (optional)


    .. Note::

      Fedora does not make checksums, size, or other attributes
      available for the audit trail (since it is internal and not a
      true datastream), so the additional headers included in
      :meth:`raw_datastream` cannot be added here.
      
    '''
    
    if repo is None:
        repo = Repository()
    # no special options are *needed* to access audit trail, since it
    # is available on any DigitalObject; but a particular view may be
    # restricted to a certain type of object
    get_obj_opts = {}
    if type is not None:
        get_obj_opts['type'] = type
    obj = repo.get_object(pid, **get_obj_opts)
    # object exists and has a non-empty audit trail
    if obj.exists and obj.has_requisite_content_models and obj.audit_trail:
        response = HttpResponse(obj.audit_trail.serialize(),
                            mimetype='text/xml')
        # audit trail is updated every time the object gets modified
        response['Last-Modified'] = obj.modified
        return response
        
    else:
        raise Http404
Example #54
0
def datastream_etag(request, pid, dsid, type=None, repo=None, **kwargs):
    '''Method suitable for use as an etag function with
    :class:`django.views.decorators.http.condition`.  Takes the same
    arguments as :meth:`~eulfedora.views.raw_datastream`.
    '''
    try:
        if repo is None:
            repo = Repository()
        get_obj_opts = {}
        if type is not None:
            get_obj_opts['type'] = type
        obj = repo.get_object(pid, **get_obj_opts)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists and ds.checksum_type != 'DISABLED':
            return ds.checksum
    except RequestFailed:
        pass
    
    return None
Example #55
0
    def handle(self, *args, **options):
        repo = Repository()
        for pid in options['pid']:
            vol = repo.get_object(pid, type=Volume)
            if options['tei']:
                tei = load_xmlobject_from_file(options['tei'], Facsimile)
            else:
                tei = annotate.annotated_tei(vol.generate_volume_tei(),
                                             vol.annotations())
            try:
                zipfile = export.website(vol, tei)
            except export.ExportException as err:
                raise CommandError(err)

            zipfilename = '%s-annotated-site.zip' % vol.noid
            shutil.copyfile(zipfile.name, zipfilename)

            print 'Export for %s complete, zipfile is %s' % (vol.noid,
                                                             zipfilename)
Example #56
0
def page_modified(request, vol_pid, pid):
    'last modification time for a single page'
    solr = solr_interface()
    # TODO: use volume pid in query
    results = solr.query(content_model=PageV1_0.PAGE_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')

    # if user is logged in, page should show as modified
    # when annotations have changed
    latest_note = None
    if request.user.is_authenticated():
        # last update for annotations on this volume, if any
        repo = Repository()
        page = repo.get_object(pid, type=Page)
        latest_note = page.annotations().visible_to(request.user) \
                          .last_updated_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)