def test_init_retries(self): # default repo = Repository('http://fedo.ra', 'user', 'passwd') self.assertEqual(Repository.retries, repo.retries) # number specified repo = Repository('http://fedo.ra', 'user', 'passwd', retries=5) self.assertEqual(5, repo.retries) # No retries specified repo = Repository('http://fedo.ra', 'user', 'passwd', retries=None) self.assertEqual(None, repo.retries)
def download_file(pid, dsid): repo = Repository(testsettings.FEDORA_ROOT_NONSSL, testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD) obj = repo.get_object(pid) ds = obj.getDatastreamObject(dsid) widgets = [ 'Download: ', progressbar.widgets.Percentage(), ' ', progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(), ' ', progressbar.widgets.FileTransferSpeed() ] # set initial progressbar size based on file; will be slightly larger because # of multipart boundary content pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start() # download content to a tempfile tmpfile = tempfile.NamedTemporaryFile(prefix='%s-%s_' % (pid, dsid), delete=False) print('writing to ', tmpfile.name) size_read = 0 try: for chunk in ds.get_chunked_content(): size_read += len(chunk) pbar.update(size_read) tmpfile.write(chunk) except Exception: raise
def datastream_etag(request, pid, dsid, type=None, repo=None, accept_range_request=False, **kwargs): '''Method suitable for use as an etag function with :class:`django.views.decorators.http.condition`. Takes the same arguments as :meth:`~eulfedora.views.raw_datastream`. ''' # if a range is requested and it is not for the entire file, # do *NOT* return an etag if accept_range_request and request.META.get('HTTP_RANGE', None) and \ request.META['HTTP_RANGE'] != 'bytes=1-': return None try: if repo is None: repo = Repository() get_obj_opts = {} if type is not None: get_obj_opts['type'] = type obj = repo.get_object(pid, **get_obj_opts) ds = obj.getDatastreamObject(dsid) if ds and ds.exists and ds.checksum_type != 'DISABLED': return ds.checksum except RequestFailed: pass return None
def volume_modified(request, pid): 'last modification time for a single volume' solr = solr_interface() results = solr.query(content_model=VolumeV1_0.VOLUME_CONTENT_MODEL, pid=pid) \ .sort_by('-timestamp').field_limit('timestamp') # NOTE: using solr indexing timestamp instead of object last modified, since # if an object's index has changed it may have been modified, # and index timestamp for a volume will be updated when pages are added # if a user is logged in, page should show as modified # when annotation count changes latest_note = None if request.user.is_authenticated(): # NOTE: shouldn't be very expensive to init volume here; not actually # making any api calls, just using volume to get volume # uri and associated annotations repo = Repository() vol = repo.get_object(pid, type=Volume) # newest annotation creation for pages in this volume latest_note = vol.annotations().visible_to(request.user) \ .last_created_time() solrtime = results[0]['timestamp'] if results.count() else None return solrtimestamp_or_datetime(solrtime, latest_note)
def get(self, request): context = {} item_id = request.GET.get('id', None) fmt = request.GET.get('format', None) if item_id is not None: context['id'] = item_id repo = Repository(request=self.request) # generalized class-based view would need probably a get-item method # for repo objects, could use type-inferring repo variant obj = repo.get_object(item_id, type=Volume) formats = obj.unapi_formats if fmt is None: # display formats for this item context['formats'] = formats else: current_format = formats[fmt] # return requested format for this item meth = getattr(obj, current_format['method']) return HttpResponse(meth(), content_type=current_format['type']) else: # display formats for all items # NOTE: if multiple classes, should be able to combine the formats context['formats'] = Volume.unapi_formats # NOTE: doesn't really even need to be a template, could be generated # with eulxml just as easily if that simplifies reuse return render(request, 'books/unapi_format.xml', context, content_type='application/xml')
def setUp(self): # instantiate repo_admin the first time we run, after the test settings are in place if self.repo_admin is None: self.repo_admin = Repository(username=getattr(settings, 'FEDORA_TEST_USER', None), password=getattr(settings, 'FEDORA_TEST_PASSWORD', None)) self.client = Client() # create a file object to edit with open(self.ingest_fname) as ingest_f: self.obj = self.repo_admin.get_object(type=FileObject) self.obj.dc.content.title = self.obj.label = 'Test file object' self.obj.dc.content.date = '2011' self.obj.master.content = ingest_f self.obj.master.label = 'hello-world.txt' self.obj.master.checksum = self.ingest_md5sum self.obj.save() self.edit_url = reverse('file:edit', kwargs={'pid': self.obj.pid}) self.download_url = reverse('file:download', kwargs={'pid': self.obj.pid}) self.view_url = reverse('file:view', kwargs={'pid': self.obj.pid}) # create a image object for testing with open(self.image_fname) as ingest_f: self.imgobj = self.repo_admin.get_object(type=FileObject) self.imgobj.dc.content.title = self.imgobj.label = 'Test file object' self.imgobj.master.content = ingest_f self.imgobj.master.label = 'test.jpg' self.imgobj.master.checksum = self.image_md5sum self.imgobj.save() self.pids = [self.obj.pid, self.imgobj.pid]
def get_queryset(self): self.repo = Repository(request=self.request) # store the volume for use in get_context_data self.vol = self.repo.get_object(self.kwargs['pid'], type=Volume) if not self.vol.exists or not self.vol.is_a_volume: raise Http404 return self.vol.find_solr_pages()
def process(self, input): """ Write the input to the given path. """ if input is None: return #if not os.environ.get("NODETREE_WRITE_FILEOUT"): # return input repo = Repository(self._params.get("url"), self._params.get("username"), self._params.get("password")) try: buf = StringIO() Image.fromarray(input).save(buf, self._params.get("format").upper()) except IOError: raise exceptions.NodeError( "Error obtaining image buffer in format: %s" % self._params.get("format").upper(), self) pclass = get_fedora_proxy_class(self._params.get("dsid")) obj = repo.get_object(self._params.get("pid"), type=pclass) obj.DATASTREAM.content = buf obj.DATASTREAM.label = "Test Ingest Datastream 1" obj.DATASTREAM.mimetype = "image/%s" % self._params.get("format") obj.save() return input
def upload_file(filename): repo = Repository(testsettings.FEDORA_ROOT_NONSSL, testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD) filesize = os.path.getsize(filename) widgets = [ 'Upload: ', progressbar.widgets.Percentage(), ' ', progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(), ' ', progressbar.widgets.FileTransferSpeed() ] # set initial progressbar size based on file; will be slightly larger because # of multipart boundary content pbar = progressbar.ProgressBar(widgets=widgets, maxval=filesize).start() def upload_callback(monitor): # update the progressbar to actual maxval (content + boundary) pbar.max_value = monitor.len # update current status pbar.update(monitor.bytes_read) with open(filename, 'rb') as f: upload_id = repo.api.upload(f, callback=upload_callback) pbar.finish() print(upload_id)
def remove_test_objects(self): # remove any leftover test object before or after running tests # NOTE: This method expects to be called only when FEDORA_PIDSPACE has been # switched to a test pidspace # use test fedora credentials if they are set repo = Repository(root=getattr(settings, 'FEDORA_TEST_ROOT', None), username=getattr(settings, 'FEDORA_TEST_USER', None), password=getattr(settings, 'FEDORA_TEST_PASSWORD', None)) test_objects = repo.find_objects(pid__contains='%s:*' % settings.FEDORA_PIDSPACE) count = 0 for obj in test_objects: # if objects are unexpectedly not being cleaned up, pid/label may help # to isolate which test is creating the leftover objects try: repo.purge_object(obj.pid, "removing test object") # NOTE: not displaying label because we may not have permission to access it logger.info('Purged test object %s' % obj.pid) count += 1 except RequestFailed: logger.warn('Error purging test object %s' % obj.pid) if count: print >> sys.stderr, "Removed %s test object(s) with pidspace %s" \ % (count, settings.FEDORA_PIDSPACE)
def get_object(self, queryset=None): # kwargs are set based on configured url pattern pid = self.kwargs['pid'] repo = Repository(request=self.request) vol = repo.get_object(pid, type=Volume) if not vol.exists or not vol.is_a_volume: raise Http404 return vol
def setUp(self): self.repo = Repository(settings.FEDORA_ROOT, settings.FEDORA_USER, settings.FEDORA_PASSWORD) with open(self.pdf_filepath) as pdf: self.pdfobj = self.repo.get_object(type=TestPdfObject) self.pdfobj.label = 'eulindexer test pdf object' self.pdfobj.pdf.content = pdf self.pdfobj.save()
def _raw_datastream(request, pid, dsid, repo=None, headers=None, as_of_date=None): '''Version of :meth:`raw_datastream` without conditionals, for use in class-based views or elsewhere.''' if repo is None: repo = Repository() # if a range request is present, pass it through to fedora rqst_headers = {} if 'HTTP_RANGE' in request.META: rqst_headers['RANGE'] = request.META['HTTP_RANGE'] try: if request.method == 'HEAD': response = repo.api.getDatastreamDissemination( pid, dsid, asOfDateTime=as_of_date, head=True, rqst_headers=rqst_headers) dj_response = HttpResponse() else: response = repo.api.getDatastreamDissemination( pid, dsid, asOfDateTime=as_of_date, stream=True, rqst_headers=rqst_headers) dj_response = StreamingHttpResponse(response.iter_content(4096)) except RequestFailed as rf: # if error is object not found, raise generic django 404 if rf.code == 404: raise Http404 # otherwise, raise the error raise # make sure django response code matches fedora code # e.g. error code or 206 partial content for range requests dj_response.status_code = response.status_code # copy fedora response headers to the django response resp_headers = response.headers # any headers passed in should take precedence if headers is not None: resp_headers.update(headers) # etag needn't always be content md5, but for fedora datastreams it is if 'ETag' in resp_headers: resp_headers['Content-MD5'] = resp_headers['ETag'] for header, value in six.iteritems(resp_headers): dj_response[header] = value return dj_response
def rdfxml(request, aggId): dsid = AggregationObject.rdfxml.id repo = Repository() obj = repo.get_object(aggId, type=AggregationObject) filename = os.path.basename(obj.dc.content.title) extra_headers = { 'Content-Disposition': "attachment; filename=%s" % filename, } return raw_datastream(request, aggId, dsid, type=AggregationObject, headers=extra_headers)
def __init__(self, pid, repocls=None, server="Development"): self.repo = repocls if not repo: username, password, root = repo.Get_Configs(server) self.repo = Repository(root=root, username=username, password=password) self.pid = pid self.GetObject()
def all(): """ Returns all collections in the repository as :class:`~genrepo.collection.models.CollectionObject` """ repo = Repository() colls = repo.get_objects_with_cmodel( CollectionObject.COLLECTION_CONTENT_MODEL, type=CollectionObject) return colls
def file(request, pid): dsid = FileObject.file.id repo = Repository() obj = repo.get_object(pid, type=FileObject) filename = os.path.basename(obj.dc.content.title) extra_headers = { 'Content-Disposition': "attachment; filename=%s" % filename, } return raw_datastream(request, pid, dsid, type=FileObject, headers=extra_headers)
def datastream_lastmodified(request, pid, dsid, type): repo = Repository() try: obj = repo.get_object(pid, type=type) ds = obj.getDatastreamObject(dsid) if ds and ds.exists: return ds.created except RequestFailed: pass
def get_object(self, queryset=None): # kwargs are set based on configured url pattern pid = self.kwargs['pid'] repo = Repository(request=self.request) obj = repo.get_object(pid, type=Collection) # if pid doesn't exist or isn't a collection, 404 if not obj.exists or not obj.has_requisite_content_models: raise Http404 return obj
def setUp(self): super(KeepTestCase, self).setUp() self.repo = Repository() # NOTE: we should use django.test.utils override_settings for this # (not available until django 1.4) self._solr_server_url = getattr(settings, 'SOLR_SERVER_URL', None) if self._solr_server_url is None: # sunburnt solr initialization expects *something* to be set settings.SOLR_SERVER_URL = 'http://localhost:919191/solr/'
def get_object(self, queryset=None): # kwargs are set based on configured url pattern pid = self.kwargs['pid'] repo = Repository(request=self.request) vol = repo.get_object(pid, type=Volume) # 404 if object doesn't exist, isn't a volume, or doesn't have tei if not vol.exists or not vol.is_a_volume or not vol.has_tei: raise Http404 # NOTE: not currently an error if volume doesn't have any # annotations, but export is probably not meaningful return vol
def members(self): '''Return all Fedora objects in the repository that are related to the current collection via isMemberOfCollection.''' # FIXME: loses repo permissions/credentials here... repo = Repository() members = repo.risearch.get_subjects(relsext.isMemberOfCollection, self.uri) # for now, just returning as generic DigitalObject instances for pid in members: # TODO: should we restrict to accessible objects only? # (requires passing correct credentials through...) yield repo.get_object(pid)
def view_collection(request, pid): '''view an existing :class:`~genrepo.collection.models.CollectionObject` identified by pid. ''' repo = Repository(request=request) obj = repo.get_object(pid, type=CollectionObject) # if the object does not exist or the current user doesn't have # permission to see that it exists, 404 if not obj.exists: raise Http404 return render(request, 'collection/view.html', {'obj': obj})
def handle(self, *args, **kwargs): verbosity = kwargs.get('verbosity', self.v_normal) # pids specified on command-line take precedence pids = kwargs.get('pids', []) repo = Repository() # if no pids were specified, find all AFFs if not pids: objs = repo.get_objects_with_cmodel(DiskImage.DISKIMAGE_CONTENT_MODEL, type=DiskImage) for obj in objs: # objects found by risearch *should* exist, but # just in case of discrepancies (hopefully only in QA), # ignore non-existent objects if not obj.exists: self.stderr.write(self.style.WARNING('%s does not exist' % obj.pid)) continue # check premis for to find Disk Images in AFF format; # exclude any that have already been migrated if obj.provenance.exists: premis = obj.provenance.content if premis.object and premis.object.format \ and premis.object.format.name == 'AFF' \ and not obj.migrated: pids.append(obj.pid) # create a celery result set and queue conversion of each pid requested # or found in fedora migration_tasks = celery.result.ResultSet([]) for pid in pids: migration_tasks.add(migrate_aff_diskimage.delay(pid)) # wait for tasks to complete while migration_tasks.waiting(): try: migration_tasks.join() except Exception: # exceptions from tasks gets propagated here, but ignore # them and report based on success/failure pass print '%d migrations completed, %s failures' % \ (migration_tasks.completed_count(), 'some' if migration_tasks.failed() else 'no') for result in migration_tasks.results: if result.state == celery.states.FAILURE: print 'Error: %s' % result.result else: print 'Success: %s' % result.result
def handle(self, *args, **options): self.oe_user = User.objects.get(username='******') self.options = options self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all self.v_normal = 1 # create language code list by name l = language_codes() self.lang_codes = dict((v, k) for k, v in l.items()) #counters self.counts = defaultdict(int) # check required options if not self.options['username']: raise CommandError('Username is required') else: if not self.options['password'] or self.options['password'] == '': self.options['password'] = getpass("Password for %s:" % self.options['username']) #connection to repository self.repo = Repository(username=self.options['username'], password=self.options['password']) try: #if ids specified, use that list if len(args) != 0: ids = list(args) #TODO symplectic query here for id in ids: self.counts['total']+=1 self.output(1, "Processing %s" % id) self.symplectic_to_oe_by_id(id) else: #search for Articles #TODO symplectic query here articles = [] except Exception as e: print traceback.print_exc() raise CommandError('Error gettings ids (%s)' % e.message) # summarize what was done self.stdout.write("\n\n") self.stdout.write("Total number selected: %s\n" % self.counts['total']) self.stdout.write("Skipped: %s\n" % self.counts['skipped']) self.stdout.write("Errors: %s\n" % self.counts['errors']) self.stdout.write("Created: %s\n" % self.counts['created'])
def process_items(self): digwf_api = Client(self.options.digwf_url) repo = Repository(self.options.fedora_url) for item_id in self.options.item_ids: try: result = digwf_api.get_items(item_id=item_id) except requests.exceptions.HTTPError as err: print 'Domokun Connection Error! Unable to query DigWF REST API for %s: %s' % ( item_id, err) continue try: r = requests.head(self.options.fedora_url) # prints the int of the status code. except requests.ConnectionError: print 'Fedora Connection Error! Unable to query Fedora REST API' continue if result.count == 1: item = result.items[0] print 'Found item %s (pid %s, control key %s, marc %s)' % \ (item_id, item.pid or '-', item.control_key, item.marc_path) try: repo.get_object(pid=item.pid) except requests.exceptions.HTTPError as err: print 'Fedora Connection Error! Unable to query Fedora REST API for %s: %s' % ( item.pid, err) continue elif result.count == 0: print 'No item found for this item id %s' % item_id continue else: # shouldn't get more than one match when looking up by # item id, but just in case print 'Error! DigWF returned %d matches for this item id %s' % \ (result.count, item_id) continue # returns a bagit bag object. newbag = LsdiBaggee(item, repo).create_bag(self.options.output) # generate source organization summary for this bag # self.load_source_summary(newbag) print 'Bag created at %s' % newbag
def setUp(self): # load test object to test views with repo = Repository() self.obj = repo.get_object(type=SimpleDigitalObject) self.obj.dc.content.title = 'test object for generic views' self.obj.text.content = 'sample plain-text content' img_file = os.path.join(settings.FEDORA_FIXTURES_DIR, 'test.png') self.obj.image.content = open(img_file, mode='rb') # force datastream checksums so we can test response headers for ds in [ self.obj.dc, self.obj.rels_ext, self.obj.text, self.obj.image ]: ds.checksum_type = 'MD5' self.obj.save()
def test_index_data(self): # create a test object for testing index data view repo = Repository() testobj = repo.get_object(type=SimpleObject) testobj.label = 'test object' testobj.owner = 'tester' testobj.save() self.pids.append(testobj.pid) # test with request IP not allowed to access the service with override_settings(EUL_INDEXER_ALLOWED_IPS=['0.13.23.134']): response = index_data(self.request, testobj.pid) expected, got = 403, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for index_data view with request IP not in configured list' \ % (expected, got)) # test with request IP allowed to hit the service with override_settings(EUL_INDEXER_ALLOWED_IPS=[self.request_ip]): response = index_data(self.request, testobj.pid) expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for index_data view' \ % (expected, got)) expected, got = 'application/json', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on index_data view' \ % (expected, got)) response_data = json.loads(response.content.decode('utf-8')) self.assertEqual( testobj.index_data(), response_data, 'Response content loaded from JSON should be equal to object indexdata' ) # test with basic auth testuser, testpass = '******', 'testpass' token = base64.b64encode( force_bytes('%s:%s' % (testuser, testpass))) self.request.META['HTTP_AUTHORIZATION'] = 'Basic %s' % force_text( token) with patch('eulfedora.indexdata.views.TypeInferringRepository' ) as typerepo: typerepo.return_value.get_object.return_value.index_data.return_value = {} index_data(self.request, testobj.pid) typerepo.assert_called_with(username=testuser, password=testpass) # non-existent pid should generate a 404 self.assertRaises(Http404, index_data, self.request, 'bogus:testpid')
def object_tags(request, pid): '''Set & display private tags on a particular :class:`~eulfedora.models.DigitalObject` (saved in the database by way of :class:`~openemory.accounts.models.Bookmark`). On an HTTP GET, returns a JSON list of the tags for the specified object, or 404 if the object has not been tagged. On an HTTP PUT, will replace any existing tags with tags from the body of the request. Uses :meth:`taggit.utils.parse_tags` to parse tags, with the same logic :mod:`taggit` uses for parsing keyword and phrase tags on forms. After a successul PUT, returns the a JSON response with a list of the updated tags. If the Fedora object does not exist, returns a 404 error. ''' # bookmark options that will be used to create a new or find an # existing bookmark for either GET or PUT bkmark_opts = {'user': request.user, 'pid': pid} status_code = 200 # if all goes well, unless creating a new bookmark if request.method == 'PUT': # don't allow tagging non-existent objects # NOTE: this will 404 if a bookmark is created and an object # subsequently is removed or otherwise becomes unavailable in # the repository repo = Repository(request=request) obj = repo.get_object(pid) # if this fedora API call becomes expensive, may want to # consider querying Solr instead if not obj.exists: raise Http404 bookmark, created = Bookmark.objects.get_or_create(**bkmark_opts) if created: status_code = 201 bookmark.tags.set(*parse_tags(request.read())) # fall through to GET handling and display the newly-updated tags # should we return 201 when creating a new bookmark ? if request.method == 'GET': bookmark = get_object_or_404(Bookmark, **bkmark_opts) # GET or successful PUT tags = [tag.name for tag in bookmark.tags.all()] return HttpResponse(json_serializer.encode(tags), status=status_code, mimetype='application/json')
def curl_download_file(pid, dsid): repo = Repository(testsettings.FEDORA_ROOT_NONSSL, testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD) obj = repo.get_object(pid) ds = obj.getDatastreamObject(dsid) tmpfile = tempfile.NamedTemporaryFile(prefix='%s-%s_' % (pid, dsid), delete=False) print('writing to ', tmpfile.name) widgets = [ 'Download: ', progressbar.widgets.Percentage(), ' ', progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(), ' ', progressbar.widgets.FileTransferSpeed() ] # set initial progressbar size based on file; will be slightly larger because # of multipart boundary content pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start() def progress(dl_total, dl, up_total, up): # update current status pbar.update(dl) c = pycurl.Curl() auth = base64.b64encode( force_bytes("%s:%s" % (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD))) headers = {'Authorization': 'Basic %s' % force_text(auth)} c.setopt(pycurl.VERBOSE, 1) c.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()]) # /objects/{pid}/datastreams/{dsID}/content ? [asOfDateTime] [download] c.setopt(c.URL, '%sobjects/%s/datastreams/%s/content' % \ (testsettings.FEDORA_ROOT_NONSSL, pid, dsid)) # c.setopt(c.WRITEDATA, buffer) c.setopt(c.WRITEFUNCTION, tmpfile.write) c.setopt(c.XFERINFOFUNCTION, progress) c.setopt(c.NOPROGRESS, False) c.perform() # HTTP response code, e.g. 200. print('Status: %d' % c.getinfo(c.RESPONSE_CODE)) # Elapsed time for the transfer. print('Time: %f' % c.getinfo(c.TOTAL_TIME)) c.close()