Example #1
0
class PdfToTextTest(unittest.TestCase):
    fixture_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'fixtures')
    pdf_filepath = os.path.join(fixture_dir, 'test.pdf')
    pdf_text = 'This is a short PDF document to use for testing.'

    def setUp(self):
        self.repo = Repository(settings.FEDORA_ROOT, settings.FEDORA_USER,
                               settings.FEDORA_PASSWORD)
        with open(self.pdf_filepath) as pdf:
            self.pdfobj = self.repo.get_object(type=TestPdfObject)
            self.pdfobj.label = 'eulindexer test pdf object'
            self.pdfobj.pdf.content = pdf
            self.pdfobj.save()

    def tearDown(self):
        self.repo.purge_object(self.pdfobj.pid)
        
    def test_file(self):
        # extract text from a pdf from a file on the local filesystem
        text = pdf_to_text(open(self.pdf_filepath, 'rb'))
        self.assertEqual(self.pdf_text, text)

    def test_object_datastream(self):
        # extract text from a pdf datastream in fedora
        pdfobj = self.repo.get_object(self.pdfobj.pid, type=TestPdfObject)
        text = pdf_to_text(pdfobj.pdf.content)
        self.assertEqual(self.pdf_text, text)
class Command(BaseCommand):
    ''' This command run through all the articles and makes sure that journal titles and publishers match against Sherpa Romeo
    '''
    args = "[netid netid ...]"
    help = __doc__

    option_list = BaseCommand.option_list + (
        make_option('--noact', '-n',
                    action='store_true',
                    default=False,
                    help='Fixed all caps title in articles'),
        )

    def handle(self, *args, **options):

        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1


        #connection to repository
        self.repo = Repository(settings.FEDORA_ROOT, username=settings.FEDORA_MANAGEMENT_USER, password=settings.FEDORA_PASSWORD)
        pid_set = self.repo.get_objects_with_cmodel(Publication.ARTICLE_CONTENT_MODEL, type=Publication)
        coll =  self.repo.get_object(pid=settings.PID_ALIASES['oe-collection'])
        try:
            articles = Paginator(pid_set, 100)

        except Exception as e:
            self.output(0, "Error paginating items: : %s " % (e.message))

        #process all Articles
        for p in articles.page_range:
            try:
                objs = articles.page(p).object_list
            except Exception as e:
                #print error and go to next iteration of loop
                self.output(0,"Error getting page: %s : %s " % (p, e.message))
                continue
            for article in objs:
                try:
                    if not article.exists:
                        self.output(0, "Skipping %s because pid does not exist" % article.pid)
                        continue
                    else:
                        
                        print coll
                        print article.pid
                        article.collection = coll
                        ark_uri = '%sark:/25593/%s' % (settings.PIDMAN_HOST, article.pid.split(':')[1])
                        article.dc.content.identifier_list.extend([ark_uri])
                        article.save()
        
                except Exception as e:
                    self.output(0, "Error processing pid: %s : %s " % (article.pid, e.message))
                    # self.counts['errors'] +=1


    def output(self, v, msg):
        '''simple function to handle logging output based on verbosity'''
        if self.verbosity >= v:
            self.stdout.write("%s\n" % msg)
Example #3
0
 def get_queryset(self):
     self.repo = Repository(request=self.request)
     # store the volume for use in get_context_data
     self.vol = self.repo.get_object(self.kwargs['pid'], type=Volume)
     if not self.vol.exists or not self.vol.is_a_volume:
         raise Http404
     return self.vol.find_solr_pages()
Example #4
0
def main(argv):

    pids = []    
    s = solr.SolrConnection('%s/solr' % HOST)
    repo = Repository(root='%s/fedora/' % HOST, username='******' % fedoraUser, password='******' % fedoraPass)
    results = repo.risearch.sparql_query('PREFIX dime: <http://dimenovels.org/ontology#> select ?pid where {?pid <fedora-rels-ext:isMemberOfCollection> <info:fedora/dimenovels:fame> . OPTIONAL { ?pid dime:IsCopyOf ?copy } FILTER (! BOUND(?copy)) }')
    for row in results:
        for k, v in row.items():
            pids.append(v.replace('info:fedora/', ''))

    with open('C:/Users/a1691506/Desktop/ffw_editions.csv', mode='r') as infile:
        reader = csv.reader(infile)
        editionDict = {rows[0]:rows[1] for rows in reader}

    for p in pids:

        print "Processing %s" % p

        try:
            response = s.query('PID:"%s"' % p)
            
            for hit in response.results:
                number = hit['mods_series_number_ms'][0].split(' ')[1]

            editionUri = editionDict[number]
                
            obj = repo.get_object(p)
            obj.add_relationship('http://dimenovels.org/ontology#IsCopyOf', editionUri)
            obj.save()
        except:
            print "%s failed. Check it!" % p
            continue
Example #5
0
    def process(self, input):
        """
        Write the input to the given path.
        """
        if input is None:
            return
        #if not os.environ.get("NODETREE_WRITE_FILEOUT"):
        #    return input

        repo = Repository(self._params.get("url"), self._params.get("username"),
                self._params.get("password"))
        try:
            buf = StringIO()
            Image.fromarray(input).save(buf, self._params.get("format").upper())
        except IOError:
            raise exceptions.NodeError(
                    "Error obtaining image buffer in format: %s" % 
                        self._params.get("format").upper(), self)

        pclass = get_fedora_proxy_class(self._params.get("dsid"))
        obj = repo.get_object(self._params.get("pid"), type=pclass)
        obj.DATASTREAM.content = buf
        obj.DATASTREAM.label = "Test Ingest Datastream 1"
        obj.DATASTREAM.mimetype = "image/%s" % self._params.get("format")
        obj.save()
        return input
Example #6
0
    def get(self, request):
        context = {}
        item_id = request.GET.get('id', None)
        fmt = request.GET.get('format', None)
        if item_id is not None:
            context['id'] = item_id
            repo = Repository(request=self.request)
            # generalized class-based view would need probably a get-item method
            # for repo objects, could use type-inferring repo variant
            obj = repo.get_object(item_id, type=Volume)

            formats = obj.unapi_formats

            if fmt is None:
                # display formats for this item
                context['formats'] = formats
            else:
                current_format = formats[fmt]
                # return requested format for this item
                meth = getattr(obj, current_format['method'])
                return HttpResponse(meth(), content_type=current_format['type'])

        else:
            # display formats for all items
            # NOTE: if multiple classes, should be able to combine the formats
            context['formats'] = Volume.unapi_formats

        # NOTE: doesn't really even need to be a template, could be generated
        # with eulxml just as easily if that simplifies reuse
        return render(request, 'books/unapi_format.xml', context,
            content_type='application/xml')
Example #7
0
def volume_modified(request, pid):
    'last modification time for a single volume'
    solr = solr_interface()
    results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified,
    # and index timestamp for a volume will be updated when pages are added

    # if a user is logged in, page should show as modified
    # when annotation count changes
    latest_note = None
    if request.user.is_authenticated():
        # NOTE: shouldn't be very expensive to init volume here; not actually
        # making any api calls, just using volume to get volume
        # uri and associated annotations
        repo = Repository()
        vol = repo.get_object(pid, type=Volume)
        # newest annotation creation for pages in this volume
        latest_note = vol.annotations().visible_to(request.user) \
                         .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Example #8
0
def volume_modified(request, pid):
    'last modification time for a single volume'
    solr = solr_interface()
    results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL,
                         pid=pid) \
                  .sort_by('-timestamp').field_limit('timestamp')
    # NOTE: using solr indexing timestamp instead of object last modified, since
    # if an object's index has changed it may have been modified,
    # and index timestamp for a volume will be updated when pages are added

    # if a user is logged in, page should show as modified
    # when annotation count changes
    latest_note = None
    if request.user.is_authenticated():
        # NOTE: shouldn't be very expensive to init volume here; not actually
        # making any api calls, just using volume to get volume
        # uri and associated annotations
        repo = Repository()
        vol = repo.get_object(pid, type=Volume)
        # newest annotation creation for pages in this volume
        latest_note = vol.annotations().visible_to(request.user) \
                         .last_created_time()

    solrtime = results[0]['timestamp'] if results.count() else None
    return solrtimestamp_or_datetime(solrtime, latest_note)
Example #9
0
    def remove_test_objects(self):
        # remove any leftover test object before or after running tests
        # NOTE: This method expects to be called only when FEDORA_PIDSPACE has been
        # switched to a test pidspace

        # use test fedora credentials if they are set
        repo = Repository(root=getattr(settings, 'FEDORA_TEST_ROOT', None),
                          username=getattr(settings, 'FEDORA_TEST_USER', None),
                          password=getattr(settings, 'FEDORA_TEST_PASSWORD',
                                           None))
        test_objects = repo.find_objects(pid__contains='%s:*' %
                                         settings.FEDORA_PIDSPACE)
        count = 0
        for obj in test_objects:
            # if objects are unexpectedly not being cleaned up, pid/label may help
            # to isolate which test is creating the leftover objects
            try:
                repo.purge_object(obj.pid, "removing test object")
                # NOTE: not displaying label because we may not have permission to access it
                logger.info('Purged test object %s' % obj.pid)
                count += 1
            except RequestFailed:
                logger.warn('Error purging test object %s' % obj.pid)
        if count:
            print >> sys.stderr, "Removed %s test object(s) with pidspace %s" \
                % (count, settings.FEDORA_PIDSPACE)
Example #10
0
def purge_item(item_id):
    repo = Repository()
    pid = 'pitt:%s' % (item_id,)
    objs = repo.find_objects(pid__contains=pid)
    for o in objs:
        repo.purge_object(o.pid)
        print '%s purged' % (o.pid,)
Example #11
0
def download_file(pid, dsid):
    repo = Repository(testsettings.FEDORA_ROOT_NONSSL, testsettings.FEDORA_USER,
                      testsettings.FEDORA_PASSWORD)
    obj = repo.get_object(pid)
    ds = obj.getDatastreamObject(dsid)

    widgets = ['Download: ', progressbar.widgets.Percentage(), ' ',
               progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(),
               ' ', progressbar.widgets.FileTransferSpeed()]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start()

    # download content to a tempfile
    tmpfile = tempfile.NamedTemporaryFile(
        prefix='%s-%s_' % (pid, dsid), delete=False)
    print('writing to ', tmpfile.name)
    size_read = 0
    try:
        for chunk in ds.get_chunked_content():
            size_read += len(chunk)
            pbar.update(size_read)
            tmpfile.write(chunk)
    except Exception:
        raise
Example #12
0
def datastream_etag(request, pid, dsid, type=None, repo=None, accept_range_request=False, **kwargs):
    """Method suitable for use as an etag function with
    :class:`django.views.decorators.http.condition`.  Takes the same
    arguments as :meth:`~eulfedora.views.raw_datastream`.
    """

    # if a range is requested and it is not for the entire file,
    # do *NOT* return an etag
    if accept_range_request and request.META.get("HTTP_RANGE", None) and request.META["HTTP_RANGE"] != "bytes=1-":
        return None

    try:
        if repo is None:
            repo = Repository()
        get_obj_opts = {}
        if type is not None:
            get_obj_opts["type"] = type
        obj = repo.get_object(pid, **get_obj_opts)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists and ds.checksum_type != "DISABLED":
            return ds.checksum
    except RequestFailed:
        pass

    return None
Example #13
0
    def setUp(self):
        # instantiate repo_admin the first time we run, after the test settings are in place
        if self.repo_admin is None:
            self.repo_admin = Repository(username=getattr(settings, 'FEDORA_TEST_USER', None),
                                         password=getattr(settings, 'FEDORA_TEST_PASSWORD', None))

        self.client = Client()

        # create a file object to edit
        with open(self.ingest_fname) as ingest_f:
            self.obj = self.repo_admin.get_object(type=FileObject)
            self.obj.dc.content.title =  self.obj.label = 'Test file object'
            self.obj.dc.content.date =  '2011'
            self.obj.master.content = ingest_f
            self.obj.master.label = 'hello-world.txt'
            self.obj.master.checksum = self.ingest_md5sum
            self.obj.save()
        self.edit_url = reverse('file:edit', kwargs={'pid': self.obj.pid})
        self.download_url = reverse('file:download', kwargs={'pid': self.obj.pid})
        self.view_url = reverse('file:view', kwargs={'pid': self.obj.pid})

        # create a image object for testing
        with open(self.image_fname) as ingest_f:
            self.imgobj = self.repo_admin.get_object(type=FileObject)
            self.imgobj.dc.content.title =  self.imgobj.label = 'Test file object'
            self.imgobj.master.content = ingest_f
            self.imgobj.master.label = 'test.jpg'
            self.imgobj.master.checksum = self.image_md5sum
            self.imgobj.save()

        self.pids = [self.obj.pid, self.imgobj.pid]
Example #14
0
def postcard_image(request, pid, size):
    '''Lin to postcard image in requested size.

    :param pid: postcard object pid
    :param size: size to return, one of thumbnail, medium, or large
    '''

    # NOTE: formerly this served out actual image content, via
    # fedora dissemination & djatoka
    # Images now use an IIIF image server; adding redirects here
    # for the benefit of search engines or indexes referencing
    # the old urls
    try:
        repo = Repository()
        obj = repo.get_object(pid, type=ImageObject)
        if not obj.exists:
            raise Http404

        if size == 'thumbnail':
            url = obj.thumbnail_url
        elif size == 'medium':
            url = obj.medium_img_url
        elif size == 'large':
            url = obj.large_img_url

        return HttpResponsePermanentRedirect(url)

    except RequestFailed:
        raise Http404
Example #15
0
class PdfToTextTest(unittest.TestCase):
    fixture_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               'fixtures')
    pdf_filepath = os.path.join(fixture_dir, 'test.pdf')
    pdf_text = 'This is a short PDF document to use for testing.'

    def setUp(self):
        self.repo = Repository(settings.FEDORA_ROOT, settings.FEDORA_USER,
                               settings.FEDORA_PASSWORD)
        with open(self.pdf_filepath) as pdf:
            self.pdfobj = self.repo.get_object(type=TestPdfObject)
            self.pdfobj.label = 'eulindexer test pdf object'
            self.pdfobj.pdf.content = pdf
            self.pdfobj.save()

    def tearDown(self):
        self.repo.purge_object(self.pdfobj.pid)

    def test_file(self):
        # extract text from a pdf from a file on the local filesystem
        text = pdf_to_text(open(self.pdf_filepath, 'rb'))
        self.assertEqual(self.pdf_text, text)

    def test_object_datastream(self):
        # extract text from a pdf datastream in fedora
        pdfobj = self.repo.get_object(self.pdfobj.pid, type=TestPdfObject)
        text = pdf_to_text(pdfobj.pdf.content)
        self.assertEqual(self.pdf_text, text)
Example #16
0
    def handle(self, *pids, **options):
        # bind a handler for interrupt signal
        signal.signal(signal.SIGINT, self.interrupt_handler)

        verbosity = int(options.get('verbosity', self.v_normal))

        repo = Repository()
        try:
            pidman = DjangoPidmanRestClient()
        except Exception as err:
            # error if pid manager config options not in localsettings
            raise CommandError(err)

        old_page_target = '%s/books/pages/' % Site.objects.get_current().domain
        search_args = {'type':'ark', 'target': old_page_target, 'count': 10}
        # get a small result set to retrieve the total
        results = pidman.search_pids(**search_args)
        total = results['results_count']
        # then set a larger page size for actual processing
        search_args['count'] = 100
        if verbosity >= self.v_normal:
            print 'Found %d total page ARKs with targets to be updated' % total

        pbar = ProgressBar(widgets=[Percentage(),
            ' (', Counter(), ')',
            Bar(),
            ETA()],
            maxval=total).start()

        self.stats = defaultdict(int)
        self.processed = set()
        for ark in self.get_search_results(pidman, search_args):
            self.processed.add(ark['pid'])
            # get fedora pid from target uri
            target_uri = ark['targets'][0]['target_uri']
            baseurl, pid = target_uri.rstrip('/').rsplit('/', 1)
            try:
                page = repo.get_object(pid, type=Page)
                # this should probably only happen in dev/qa
                if not page.exists:
                    if verbosity > self.v_normal:
                        self.stderr.write('Page %s does not exist' % pid)
                    self.stats['notfound'] += 1
                else:
                    # check if volume exists?
                    pidman.update_ark_target(ark['pid'], target_uri=page.absolute_url)
                    self.stats['updated'] += 1
            except RequestFailed as rf:
                print 'Error accessing %s: %s' % (pid, rf)
                self.stats['error'] += 1

            pbar.update(len(self.processed))
            if self.interrupted:
                break

        if not self.interrupted:
            pbar.finish()
        # summarize
        self.stderr.write('Updated %(updated)d, %(error)d error(s), %(notfound)d not found' \
            % self.stats)
Example #17
0
def datastream_etag(request,
                    pid,
                    dsid,
                    type=None,
                    repo=None,
                    accept_range_request=False,
                    **kwargs):
    '''Method suitable for use as an etag function with
    :class:`django.views.decorators.http.condition`.  Takes the same
    arguments as :meth:`~eulfedora.views.raw_datastream`.
    '''

    # if a range is requested and it is not for the entire file,
    # do *NOT* return an etag
    if accept_range_request and request.META.get('HTTP_RANGE', None) and \
       request.META['HTTP_RANGE'] != 'bytes=1-':
        return None

    try:
        if repo is None:
            repo = Repository()
        get_obj_opts = {}
        if type is not None:
            get_obj_opts['type'] = type
        obj = repo.get_object(pid, **get_obj_opts)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists and ds.checksum_type != 'DISABLED':
            return ds.checksum
    except RequestFailed:
        pass

    return None
def main(argv):

    # Make Fedora connection
    repo = Repository(root='http://localhost:8080/fedora/', username='******', password='******')
    
    # Retreive pids using content model
    philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')

    # Loop through Philologic pids and retreive each object
    for p in philologic_pids:

        print 'Processing %s' % p

        # Extract the text

        philologic = p.getDatastreamObject('OBJ').content
        text=strip_tags(philologic)

        # Add FULL_TEXT
        
        full_text = p.getDatastreamObject('FULL_TEXT')
        full_text.label='Full text'
        full_text.mimetype='text/plain'
        full_text.versionable=True
        full_text.state='A'
        full_text.checksum_type='MD5'

        full_text.content = text

        full_text.save()
Example #19
0
class ThesisBase(unittest.TestCase):
    """Base class for testing the functionality of the ETD Django 
     application."""

    def __init__(self, *args, **kwargs):
        unittest.TestCase.__init__(self, *args, **kwargs)
        self.fedora_fixtures_ingested = []
        self.pidspace = FEDORA_PIDSPACE

    def setUp(self):
        """Creates a base class instance of an `eulfedora` Repository 
        for testing the basic functionality of the ingesting
        a thesis object into a Fedora Repository."""
        self.repo = Repository()
#        self.repo = Repository(FEDORA_ROOT,FEDORA_USER,FEDORA_PASSWORD)
        self.repo.risearch.RISEARCH_FLUSH_ON_QUERY = True
    
    def tearDown(self):
        """Removes test objects from the repository"""
        for pid in self.fedora_fixtures_ingested:
            try:
                self.repo.purge_object(pid)
            except RequestFailed as rf:
                logger.warn('Error purging test object %s in tear down:%s' %\
                            (pid,rf)) 
Example #20
0
    def process(self, input):
        """
        Write the input to the given path.
        """
        if input is None:
            return
        #if not os.environ.get("NODETREE_WRITE_FILEOUT"):
        #    return input

        repo = Repository(self._params.get("url"),
                          self._params.get("username"),
                          self._params.get("password"))
        try:
            buf = StringIO()
            Image.fromarray(input).save(buf,
                                        self._params.get("format").upper())
        except IOError:
            raise exceptions.NodeError(
                "Error obtaining image buffer in format: %s" %
                self._params.get("format").upper(), self)

        pclass = get_fedora_proxy_class(self._params.get("dsid"))
        obj = repo.get_object(self._params.get("pid"), type=pclass)
        obj.DATASTREAM.content = buf
        obj.DATASTREAM.label = "Test Ingest Datastream 1"
        obj.DATASTREAM.mimetype = "image/%s" % self._params.get("format")
        obj.save()
        return input
    def _load_postcard(self, label, description, subjects, filename):
        '''Create a postcard object and load to fedora.

        :param label: object label and dc:title
        :param description: object dc:description
        :param subjects: list of subjects to be set in dc:subject
        :param filename: filename for image content, assumed relative to current directory
        '''
        # NOTE: image object init here somewhat redundant with current postcard ingest logic
        repo = Repository()
        obj = repo.get_object(type=ImageObject)
        obj.label = label
        obj.owner = settings.FEDORA_OBJECT_OWNERID
        obj.dc.content.title = obj.label
        obj.dc.content.description_list.extend(description)
        obj.dc.content.subject_list.extend(subjects)
        # common DC for all postcards
        obj.dc.content.type = 'image'
        # FIXME: configure this somewhere?
        obj.dc.content.relation_list.extend([settings.RELATION,
                                 'http://beck.library.emory.edu/greatwar/'])
        # set file as content of image datastream
        obj.image.content = open(path.join(fixture_path, filename))
        # add relation to postcard collection
        obj.rels_ext.content.add((
                    URIRef(obj.uri),
                    URIRef(MEMBER_OF_COLLECTION),
                    URIRef(PostcardCollection.get().uri)
            ))
        obj.save()
        self.postcards.append(obj)
Example #22
0
def browse(request):
    "Browse postcards and display thumbnail images."
    repo = Repository()
    repo.default_object_type = ImageObject
    number_of_results = 15
    context = {}

    search_opts = postcard_search_opts().copy()
    if 'subject' in request.GET:
        context['subject'] = request.GET['subject']
        search_opts['subject'] = request.GET['subject']

    postcards = repo.find_objects(**search_opts)

    postcard_paginator = Paginator(list(postcards), number_of_results)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    # If page request (9999) is out of range, deliver last page of results.
    try:
        postcard_page = postcard_paginator.page(page)
    except (EmptyPage, InvalidPage):
        postcard_page = postcard_paginator.page(postcard_paginator.num_pages)

    context['postcards_paginated'] = postcard_page

    return render(request, 'postcards/browse.html', context)
Example #23
0
def download_file(pid, dsid):
    repo = Repository(testsettings.FEDORA_ROOT_NONSSL,
                      testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)
    obj = repo.get_object(pid)
    ds = obj.getDatastreamObject(dsid)

    widgets = [
        'Download: ',
        progressbar.widgets.Percentage(), ' ',
        progressbar.widgets.Bar(), ' ',
        progressbar.widgets.ETA(), ' ',
        progressbar.widgets.FileTransferSpeed()
    ]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start()

    # download content to a tempfile
    tmpfile = tempfile.NamedTemporaryFile(prefix='%s-%s_' % (pid, dsid),
                                          delete=False)
    print('writing to ', tmpfile.name)
    size_read = 0
    try:
        for chunk in ds.get_chunked_content():
            size_read += len(chunk)
            pbar.update(size_read)
            tmpfile.write(chunk)
    except Exception:
        raise
def main(argv):

    repo = Repository(root='%s/fedora/' % HOST, username='******' % fedoraUser, password='******' % fedoraPass)
    
    philologic_pids = repo.get_objects_with_cmodel(cmodel_uri='info:fedora/niu-objects:cmodel')

    phil_doc = open('phil_doc.csv', 'w')

    image_ids = []
    d = defaultdict(int)
        
    for p in philologic_pids:
        
        philologic = p.getDatastreamObject('OBJ').content

        substring = 'ARTFL-figure-missing'

        if substring in philologic:

            print 'Processing %s' % p
           
            images = []
            image_count = 0

            url = '%s/fedora/objects/%s/datastreams/OBJ/content' % (HOST, p)

            passwordManager = urllib2.HTTPPasswordMgrWithDefaultRealm()
            fedoraAdmin = "%s/fedora" % HOST
            passwordManager.add_password(None, fedoraAdmin, fedoraUser, fedoraPass)
            handler = urllib2.HTTPBasicAuthHandler(passwordManager)
            fedoraOpener = urllib2.build_opener(handler)

            soup = BeautifulSoup(fedoraOpener.open(url), 'html.parser')

            spans = soup.find_all('span', 'ARTFL-figure-missing')

            for span in spans:
                
                image = span['sysid']
                images.append(image)
                image_count+= 1

            image_ids.extend(images)
            images_string = ';'.join(images)

            phil_doc.write('%s,%s,%s\n' % (p, image_count, images_string))

            print 'Successfully processed %s' % p

    for i in image_ids:
        d[i] += 1

    with open('phil_image.csv', 'w') as outfile:

        phil_image = csv.writer(outfile)

        for key, value in d.items():
            phil_image.writerow([key, value])
            
    phil_doc.close()
Example #25
0
    def get(self, request):
        context = {}
        item_id = request.GET.get('id', None)
        fmt = request.GET.get('format', None)
        if item_id is not None:
            context['id'] = item_id
            repo = Repository(request=self.request)
            # generalized class-based view would need probably a get-item method
            # for repo objects, could use type-inferring repo variant
            obj = repo.get_object(item_id, type=Volume)

            formats = obj.unapi_formats

            if fmt is None:
                # display formats for this item
                context['formats'] = formats
            else:
                current_format = formats[fmt]
                # return requested format for this item
                meth = getattr(obj, current_format['method'])
                return HttpResponse(meth(),
                                    content_type=current_format['type'])

        else:
            # display formats for all items
            # NOTE: if multiple classes, should be able to combine the formats
            context['formats'] = Volume.unapi_formats

        # NOTE: doesn't really even need to be a template, could be generated
        # with eulxml just as easily if that simplifies reuse
        return render(request,
                      'books/unapi_format.xml',
                      context,
                      content_type='application/xml')
Example #26
0
class DatastreamXml():
    def __init__(self, pid, repocls=None, server="Development"):
        self.repo = repocls
        if not repo:
            username, password, root = repo.Get_Configs(server)
            self.repo = Repository(root=root,
                                   username=username,
                                   password=password)
        self.pid = pid
        self.GetObject()

    def GetObject(self):
        self.digital_object = self.repo.get_object(self.pid)

    def ReplaceDs(self, dsid, xml_path):
        self.dsid = dsid
        self.xml_path = xml_path
        xml_object = self._MakeXmlObject()
        digital_object = self.repo.get_object(self.pid)
        datastream = DatastreamObject(digital_object, self.dsid)
        datastream.content = xml_object
        new_datastream.label = "_".join(self.pid.replace(":", ""), dsid)
        new_datastream.save()

    def _MakeXmlObject(self):
        return xmlmap.load_xmlobject_from_file(self.xml_path)

    def GetMarcDs(self):
        self.marcxml_object = self.digital_object.getDatastreamObject(
            "MARCXML")
        self.marcxml_content = self.marcxml_object.content.serialize()
        self.marc_tree = etree.fromstring(self.marcxml_content)
        return self.marc_tree
Example #27
0
def main(argv):

    csvfile = open ("C:/Users/a1691506/Desktop/repo_size.csv", 'wb')
    csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    repo = Repository(root='%s/fedora/' % HOST, username='******' % fedoraUser, password='******' % fedoraPass)
    risearch = repo.risearch
    query = 'select ?pid ?date where {?pid <fedora-model:hasModel> <info:fedora/fedora-system:FedoraObject-3.0> ; <fedora-model:createdDate> ?date . } ORDER BY ASC(?date)'

    pids = risearch.find_statements(query, language='sparql', type='tuples', flush=None)

    repo_size = 0

    for dictionary in pids:

        p = dictionary['pid']
        pid = p.replace('info:fedora/', '')

        dateCreated = dictionary['date']

        total_size = 0
        obj = repo.get_object(pid)
        datastreams = obj.ds_list
        for datastream in datastreams:
            ds = obj.getDatastreamObject(datastream)
            size = ds.size
            total_size += size
        repo_size += total_size
        
        print "Total size for %s: %s" % (pid, total_size)

        csvwriter.writerow([pid, dateCreated, total_size, repo_size])
def main(argv):

    repo = Repository(root='%s/fedora/' % HOST, username='******' % fedoraUser, password='******' % fedoraPass)
    risearch = repo.risearch
    query = 'select ?pid where {?pid <fedora-view:disseminates> ?ds . ?pid <fedora-model:hasModel> <info:fedora/islandora:pageCModel> . ?ds <fedora-view:disseminationType> <info:fedora/*/PDF>}'

    pids = risearch.find_statements(query, language='sparql', type='tuples', flush=None)

    #total = 0
    

    for dictionary in pids:

      for key in dictionary:

        p = dictionary[key]
        pid = p.replace('info:fedora/', '')


        obj = repo.get_object(pid)
        pdf = obj.getDatastreamObject("PDF")
        #size = pdf.size
        #total += size
        obj.api.purgeDatastream(pid, "PDF")
        obj.save()
        
        print "Purged PDF for %s" % pid
Example #29
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     vol = repo.get_object(pid, type=Volume)
     if not vol.exists or not vol.is_a_volume:
         raise Http404
     return vol
Example #30
0
 def setUp(self):
     self.repo = Repository(settings.FEDORA_ROOT, settings.FEDORA_USER,
                            settings.FEDORA_PASSWORD)
     with open(self.pdf_filepath) as pdf:
         self.pdfobj = self.repo.get_object(type=TestPdfObject)
         self.pdfobj.label = 'eulindexer test pdf object'
         self.pdfobj.pdf.content = pdf
         self.pdfobj.save()
Example #31
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     vol = repo.get_object(pid, type=Volume)
     if not vol.exists or not vol.is_a_volume:
         raise Http404
     return vol
Example #32
0
def rdf_profile(request, username):
    '''Profile information comparable to the human-readable content
    returned by :meth:`profile`, but in RDF format.'''

    # retrieve user & publications - same logic as profile above
    user, userprofile = _get_profile_user(username)
    articles = userprofile.recent_articles(limit=10)

    # build an rdf graph with information author & publications
    rdf = RdfGraph()
    for prefix, ns in ns_prefixes.iteritems():
        rdf.bind(prefix, ns)
    author_node = BNode()
    profile_uri = URIRef(request.build_absolute_uri(reverse('accounts:profile',
                                                    kwargs={'username': username})))
    profile_data_uri = URIRef(request.build_absolute_uri(reverse('accounts:profile-data',
                                                         kwargs={'username': username})))

    # author information
    rdf.add((profile_uri, FOAF.primaryTopic, author_node))
    rdf.add((author_node, RDF.type, FOAF.Person))
    rdf.add((author_node, FOAF.nick, Literal(user.username)))
    rdf.add((author_node, FOAF.publications, profile_uri))

    try:
        esd_data = userprofile.esd_data()
    except EsdPerson.DoesNotExist:
        esd_data = None

    if esd_data:
        rdf.add((author_node, FOAF.name, Literal(esd_data.directory_name)))
    else:
        rdf.add((author_node, FOAF.name, Literal(user.get_full_name())))

    if esd_data and not userprofile.suppress_esd_data:
        mbox_sha1sum = hashlib.sha1(esd_data.email).hexdigest()
        rdf.add((author_node, FOAF.mbox_sha1sum, Literal(mbox_sha1sum)))
        if esd_data.phone:
            rdf.add((author_node, FOAF.phone, URIRef('tel:' + esd_data.phone)))

    # TODO: use ESD profile data where appropriate
    # (and honor internet/directory suppressed, suppression override)

    # article information
    repo = Repository(request=request)
    for record in articles:
        obj = repo.get_object(record['pid'], type=Publication)
        obj_node = BNode() # info:fedora/ uri is not public

        # relate to author
        rdf.add((author_node, FRBR.creatorOf, obj_node))
        rdf.add((author_node, FOAF.made, obj_node))
        # add object rdf
        rdf += obj.as_rdf(node=obj_node)

    response = HttpResponse(rdf.serialize(), content_type='application/rdf+xml')
    response['Content-Location'] = profile_data_uri
    return response
Example #33
0
def file(request, pid):
    dsid = FileObject.file.id
    repo = Repository()
    obj = repo.get_object(pid, type=FileObject)
    filename = os.path.basename(obj.dc.content.title)
    extra_headers = {
        'Content-Disposition': "attachment; filename=%s" % filename,
    }
    return raw_datastream(request, pid, dsid, type=FileObject, headers=extra_headers)
Example #34
0
 def all():
     """
     Returns all collections in the repository as
     :class:`~genrepo.collection.models.CollectionObject`
     """
     repo = Repository()
     colls = repo.get_objects_with_cmodel(CollectionObject.COLLECTION_CONTENT_MODEL,
                                          type=CollectionObject)
     return colls
Example #35
0
def rdfxml(request, aggId):
    dsid = AggregationObject.rdfxml.id
    repo = Repository()
    obj = repo.get_object(aggId, type=AggregationObject)
    filename = os.path.basename(obj.dc.content.title)
    extra_headers = {
        'Content-Disposition': "attachment; filename=%s" % filename,
    }
    return raw_datastream(request, aggId, dsid, type=AggregationObject, headers=extra_headers)
Example #36
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     obj = repo.get_object(pid, type=Collection)
     # if pid doesn't exist or isn't a collection, 404
     if not obj.exists or not obj.has_requisite_content_models:
         raise Http404
     return obj
Example #37
0
def datastream_lastmodified(request, pid, dsid, type):
    repo = Repository()
    try:
        obj = repo.get_object(pid, type=type)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists:
            return ds.created
    except RequestFailed:
        pass
Example #38
0
 def __init__(self, pid, repocls=None, server="Development"):
     self.repo = repocls
     if not repo:
         username, password, root = repo.Get_Configs(server)
         self.repo = Repository(root=root,
                                username=username,
                                password=password)
     self.pid = pid
     self.GetObject()
Example #39
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     obj = repo.get_object(pid, type=Collection)
     # if pid doesn't exist or isn't a collection, 404
     if not obj.exists or not obj.has_requisite_content_models:
         raise Http404
     return obj
Example #40
0
 def all():
     """
     Returns all collections in the repository as
     :class:`~genrepo.collection.models.CollectionObject`
     """
     repo = Repository()
     colls = repo.get_objects_with_cmodel(
         CollectionObject.COLLECTION_CONTENT_MODEL, type=CollectionObject)
     return colls
Example #41
0
def datastream_lastmodified(request, pid, dsid, type):
    repo = Repository()
    try:
        obj = repo.get_object(pid, type=type)
        ds = obj.getDatastreamObject(dsid)
        if ds and ds.exists:
            return ds.created
    except RequestFailed:
        pass
Example #42
0
def view_postcard_large(request, pid):
    '''View a large image of postcard with title only.'''
    repo = Repository()
    try:
        obj = repo.get_object(pid, type=ImageObject)
        obj.label   # access object label to trigger 404 before we get to the template
        return render(request, 'postcards/view_postcard_large.html',
                      {'card': obj})
    except RequestFailed:
        raise Http404
Example #43
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     vol = repo.get_object(pid, type=Volume)
     # 404 if object doesn't exist, isn't a volume, or doesn't have tei
     if not vol.exists or not vol.is_a_volume or not vol.has_tei:
         raise Http404
     # NOTE: not currently an error if volume doesn't have any
     # annotations, but export is probably not meaningful
     return vol
Example #44
0
 def get_object(self, queryset=None):
     # kwargs are set based on configured url pattern
     pid = self.kwargs['pid']
     repo = Repository(request=self.request)
     vol = repo.get_object(pid, type=Volume)
     # 404 if object doesn't exist, isn't a volume, or doesn't have tei
     if not vol.exists or not vol.is_a_volume or not vol.has_tei:
         raise Http404
     # NOTE: not currently an error if volume doesn't have any
     # annotations, but export is probably not meaningful
     return vol
Example #45
0
 def members(self):
     '''Return all Fedora objects in the repository that are related to the current
     collection via isMemberOfCollection.'''
     # FIXME: loses repo permissions/credentials here... 
     repo = Repository()
     members = repo.risearch.get_subjects(relsext.isMemberOfCollection, self.uri)
     # for now, just returning as generic DigitalObject instances
     for pid in members:
         # TODO: should we restrict to accessible objects only?
         # (requires passing correct credentials through...)
         yield repo.get_object(pid)
Example #46
0
def view_collection(request, pid):
    """view an existing
    :class:`~genrepo.collection.models.CollectionObject` identified by
    pid.
    """
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=CollectionObject)
    # if the object does not exist or the current user doesn't have
    # permission to see that it exists, 404
    if not obj.exists:
        raise Http404
    return render(request, "collection/view.html", {"obj": obj})
Example #47
0
    def test_init_retries(self):
        # default
        repo = Repository('http://fedo.ra', 'user', 'passwd')
        self.assertEqual(Repository.retries, repo.retries)

        # number specified
        repo = Repository('http://fedo.ra', 'user', 'passwd', retries=5)
        self.assertEqual(5, repo.retries)

        # No retries specified
        repo = Repository('http://fedo.ra', 'user', 'passwd', retries=None)
        self.assertEqual(None, repo.retries)
Example #48
0
def view_collection(request, pid):
    '''view an existing
    :class:`~genrepo.collection.models.CollectionObject` identified by
    pid.
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=CollectionObject)
    # if the object does not exist or the current user doesn't have
    # permission to see that it exists, 404
    if not obj.exists:
        raise Http404
    return render(request, 'collection/view.html', {'obj': obj})
Example #49
0
 def setUp(self):
     # load test object to test views with
     repo = Repository()
     self.obj = repo.get_object(type=SimpleDigitalObject)
     self.obj.dc.content.title = 'test object for generic views'
     self.obj.text.content = 'sample plain-text content'
     img_file = os.path.join(settings.FEDORA_FIXTURES_DIR, 'test.png')
     self.obj.image.content = open(img_file)
     # force datastream checksums so we can test response headers
     for ds in [self.obj.dc, self.obj.rels_ext, self.obj.text, self.obj.image]:
         ds.checksum_type = 'MD5'
     self.obj.save()
Example #50
0
 def members(self):
     '''Return all Fedora objects in the repository that are related to the current
     collection via isMemberOfCollection.'''
     # FIXME: loses repo permissions/credentials here...
     repo = Repository()
     members = repo.risearch.get_subjects(relsext.isMemberOfCollection,
                                          self.uri)
     # for now, just returning as generic DigitalObject instances
     for pid in members:
         # TODO: should we restrict to accessible objects only?
         # (requires passing correct credentials through...)
         yield repo.get_object(pid)
Example #51
0
    def handle(self, *args, **kwargs):
        verbosity = kwargs.get('verbosity', self.v_normal)

        # pids specified on command-line take precedence
        pids = kwargs.get('pids', [])
        repo = Repository()
        # if no pids were specified, find all AFFs
        if not pids:
            objs = repo.get_objects_with_cmodel(DiskImage.DISKIMAGE_CONTENT_MODEL,
                type=DiskImage)
            for obj in objs:
                # objects found by risearch *should* exist, but
                # just in case of discrepancies (hopefully only in QA),
                # ignore non-existent objects
                if not obj.exists:
                    self.stderr.write(self.style.WARNING('%s does not exist' % obj.pid))
                    continue

                # check premis for to find Disk Images in AFF format;
                # exclude any that have already been migrated
                if obj.provenance.exists:
                    premis = obj.provenance.content
                    if premis.object and premis.object.format \
                                     and premis.object.format.name == 'AFF' \
                                     and not obj.migrated:
                        pids.append(obj.pid)

        # create a celery result set and queue conversion of each pid requested
        # or found in fedora
        migration_tasks = celery.result.ResultSet([])
        for pid in pids:
            migration_tasks.add(migrate_aff_diskimage.delay(pid))

        # wait for tasks to complete
        while migration_tasks.waiting():
            try:
                migration_tasks.join()
            except Exception:
                # exceptions from tasks gets propagated here, but ignore
                # them and report based on success/failure
                pass

        print '%d migrations completed, %s failures' % \
            (migration_tasks.completed_count(),
            'some' if migration_tasks.failed() else 'no')

        for result in migration_tasks.results:
            if result.state == celery.states.FAILURE:
                print 'Error: %s' % result.result
            else:
                print 'Success: %s' % result.result
    def handle(self, *args, **options):
        self.oe_user = User.objects.get(username='******')
        self.options = options
        self.verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        self.v_normal = 1

        # create language code list by name
        l = language_codes()
        self.lang_codes = dict((v, k) for k, v in l.items())

        #counters
        self.counts = defaultdict(int)

        # check required options
        if not self.options['username']:
            raise CommandError('Username is required')
        else:
            if not self.options['password'] or self.options['password'] == '':
                self.options['password'] = getpass("Password for %s:" % self.options['username'])

        #connection to repository
        self.repo = Repository(username=self.options['username'], password=self.options['password'])



        try:
            #if ids specified, use that list
            if len(args) != 0:
                ids = list(args)
                #TODO symplectic query here
                for id in ids:
                    self.counts['total']+=1
                    self.output(1, "Processing %s" % id)
                    self.symplectic_to_oe_by_id(id)

            else:
                #search for Articles
                #TODO symplectic query here
                articles = []

        except Exception as e:
            print traceback.print_exc()
            raise CommandError('Error gettings ids (%s)' % e.message)


        # summarize what was done
        self.stdout.write("\n\n")
        self.stdout.write("Total number selected: %s\n" % self.counts['total'])
        self.stdout.write("Skipped: %s\n" % self.counts['skipped'])
        self.stdout.write("Errors: %s\n" % self.counts['errors'])
        self.stdout.write("Created: %s\n" % self.counts['created'])
Example #53
0
 def setUp(self):
     # load test object to test views with
     repo = Repository()
     self.obj = repo.get_object(type=SimpleDigitalObject)
     self.obj.dc.content.title = 'test object for generic views'
     self.obj.text.content = 'sample plain-text content'
     img_file = os.path.join(settings.FEDORA_FIXTURES_DIR, 'test.png')
     self.obj.image.content = open(img_file, mode='rb')
     # force datastream checksums so we can test response headers
     for ds in [
             self.obj.dc, self.obj.rels_ext, self.obj.text, self.obj.image
     ]:
         ds.checksum_type = 'MD5'
     self.obj.save()
Example #54
0
    def test_index_data(self):
        # create a test object for testing index data view
        repo = Repository()
        testobj = repo.get_object(type=SimpleObject)
        testobj.label = 'test object'
        testobj.owner = 'tester'
        testobj.save()
        self.pids.append(testobj.pid)

        # test with request IP not allowed to access the service
        with override_settings(EUL_INDEXER_ALLOWED_IPS=['0.13.23.134']):
            response = index_data(self.request, testobj.pid)
            expected, got = 403, response.status_code
            self.assertEqual(expected, got,
                'Expected %s but returned %s for index_data view with request IP not in configured list' \
                % (expected, got))

        # test with request IP allowed to hit the service
        with override_settings(EUL_INDEXER_ALLOWED_IPS=[self.request_ip]):
            response = index_data(self.request, testobj.pid)
            expected, got = 200, response.status_code
            self.assertEqual(expected, got,
                'Expected %s but returned %s for index_data view' \
                % (expected, got))
            expected, got = 'application/json', response['Content-Type']
            self.assertEqual(expected, got,
                'Expected %s but returned %s for mimetype on index_data view' \
                % (expected, got))
            response_data = json.loads(response.content.decode('utf-8'))
            self.assertEqual(
                testobj.index_data(), response_data,
                'Response content loaded from JSON should be equal to object indexdata'
            )

            # test with basic auth
            testuser, testpass = '******', 'testpass'
            token = base64.b64encode(
                force_bytes('%s:%s' % (testuser, testpass)))
            self.request.META['HTTP_AUTHORIZATION'] = 'Basic %s' % force_text(
                token)
            with patch('eulfedora.indexdata.views.TypeInferringRepository'
                       ) as typerepo:
                typerepo.return_value.get_object.return_value.index_data.return_value = {}
                index_data(self.request, testobj.pid)
                typerepo.assert_called_with(username=testuser,
                                            password=testpass)

            # non-existent pid should generate a 404
            self.assertRaises(Http404, index_data, self.request,
                              'bogus:testpid')
Example #55
0
    def process_items(self):

        digwf_api = Client(self.options.digwf_url)
        repo = Repository(self.options.fedora_url)

        for item_id in self.options.item_ids:
            try:
                result = digwf_api.get_items(item_id=item_id)
            except requests.exceptions.HTTPError as err:
                print 'Domokun Connection Error! Unable to query DigWF REST API for %s: %s' % (
                    item_id, err)
                continue

            try:
                r = requests.head(self.options.fedora_url)
                # prints the int of the status code.
            except requests.ConnectionError:
                print 'Fedora Connection Error! Unable to query Fedora REST API'
                continue

            if result.count == 1:
                item = result.items[0]
                print 'Found item %s (pid %s, control key %s, marc %s)' % \
                    (item_id, item.pid or '-', item.control_key,
                     item.marc_path)
                try:
                    repo.get_object(pid=item.pid)
                except requests.exceptions.HTTPError as err:
                    print 'Fedora Connection Error! Unable to query Fedora REST API for %s: %s' % (
                        item.pid, err)
                    continue

            elif result.count == 0:
                print 'No item found for this item id %s' % item_id
                continue
            else:
                # shouldn't get more than one match when looking up by
                # item id, but just in case
                print 'Error! DigWF returned %d matches for this item id %s' % \
                    (result.count, item_id)

                continue

            # returns a bagit bag object.
            newbag = LsdiBaggee(item, repo).create_bag(self.options.output)

            # generate source organization summary for this bag
            # self.load_source_summary(newbag)

            print 'Bag created at %s' % newbag
Example #56
0
def object_tags(request, pid):
    '''Set & display private tags on a particular
    :class:`~eulfedora.models.DigitalObject` (saved in the database by
    way of :class:`~openemory.accounts.models.Bookmark`).

    On an HTTP GET, returns a JSON list of the tags for the specified
    object, or 404 if the object has not been tagged.

    On an HTTP PUT, will replace any existing tags with tags from the
    body of the request.  Uses :meth:`taggit.utils.parse_tags` to
    parse tags, with the same logic :mod:`taggit` uses for parsing
    keyword and phrase tags on forms.  After a successul PUT, returns
    the a JSON response with a list of the updated tags.  If the
    Fedora object does not exist, returns a 404 error.
    '''

    # bookmark options that will be used to create a new or find an
    # existing bookmark for either GET or PUT
    bkmark_opts = {'user': request.user, 'pid': pid}

    status_code = 200  # if all goes well, unless creating a new bookmark

    if request.method == 'PUT':
        # don't allow tagging non-existent objects
        # NOTE: this will 404 if a bookmark is created and an object
        # subsequently is removed or otherwise becomes unavailable in
        # the repository
        repo = Repository(request=request)
        obj = repo.get_object(pid)
        # if this fedora API call becomes expensive, may want to
        # consider querying Solr instead
        if not obj.exists:
            raise Http404

        bookmark, created = Bookmark.objects.get_or_create(**bkmark_opts)
        if created:
            status_code = 201
        bookmark.tags.set(*parse_tags(request.read()))
        # fall through to GET handling and display the newly-updated tags
        # should we return 201 when creating a new bookmark ?

    if request.method == 'GET':
        bookmark = get_object_or_404(Bookmark, **bkmark_opts)

    # GET or successful PUT
    tags = [tag.name for tag in bookmark.tags.all()]
    return HttpResponse(json_serializer.encode(tags),
                        status=status_code,
                        mimetype='application/json')
Example #57
0
def curl_download_file(pid, dsid):
    repo = Repository(testsettings.FEDORA_ROOT_NONSSL,
                      testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)
    obj = repo.get_object(pid)
    ds = obj.getDatastreamObject(dsid)

    tmpfile = tempfile.NamedTemporaryFile(prefix='%s-%s_' % (pid, dsid),
                                          delete=False)
    print('writing to ', tmpfile.name)

    widgets = [
        'Download: ',
        progressbar.widgets.Percentage(), ' ',
        progressbar.widgets.Bar(), ' ',
        progressbar.widgets.ETA(), ' ',
        progressbar.widgets.FileTransferSpeed()
    ]
    # set initial progressbar size based on file; will be slightly larger because
    # of multipart boundary content
    pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start()

    def progress(dl_total, dl, up_total, up):
        # update current status
        pbar.update(dl)

    c = pycurl.Curl()
    auth = base64.b64encode(
        force_bytes("%s:%s" %
                    (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD)))
    headers = {'Authorization': 'Basic %s' % force_text(auth)}
    c.setopt(pycurl.VERBOSE, 1)
    c.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()])

    # /objects/{pid}/datastreams/{dsID}/content ? [asOfDateTime] [download]
    c.setopt(c.URL, '%sobjects/%s/datastreams/%s/content' % \
        (testsettings.FEDORA_ROOT_NONSSL, pid, dsid))
    # c.setopt(c.WRITEDATA, buffer)
    c.setopt(c.WRITEFUNCTION, tmpfile.write)
    c.setopt(c.XFERINFOFUNCTION, progress)
    c.setopt(c.NOPROGRESS, False)
    c.perform()

    # HTTP response code, e.g. 200.
    print('Status: %d' % c.getinfo(c.RESPONSE_CODE))
    # Elapsed time for the transfer.
    print('Time: %f' % c.getinfo(c.TOTAL_TIME))

    c.close()
Example #58
0
    def cascade_updated_articles(self):
        '''Reindex all articles associated with faculty who have been
        updated (either article-indexed person data has changed or
        a previously-indexed faculty member is no longer in ESD).
        '''
        updated_articles = set()
        for username in self.updated_faculty:
            for article in self.articles_by_faculty(username):
                updated_articles.add(article['pid'])

        repo = Repository()
        for pid in updated_articles:
            if self.verbosity >= self.v_all:
                print 'Indexing article', pid
            article = repo.get_object(pid, type=Article)
            self.solr.add(article.index_data())