def ingest(self, book_obj, page_num): # set book_obj to self self.book_obj = book_obj # using parent book, get datastreams from objMeta page_dict = self.book_obj.pages_from_objMeta[page_num] # new pid npid = "wayne:%s_Page_%s" % (self.book_obj.pid.split(":")[1], page_num) # creating new self self.ohandle = fedora_handle.get_object(npid) if self.ohandle.exists: fedora_handle.purge_object(self.ohandle) self.ohandle = fedora_handle.get_object(npid, create=True) self.ohandle.save() # label self.ohandle.label = "%s - Page %s" % (self.book_obj.ohandle.label, page_num) # write POLICY datastream # NOTE: 'E' management type required, not 'R' print "Using policy:",self.book_obj.objMeta['policy'] policy_suffix = self.book_obj.objMeta['policy'].split("info:fedora/")[1] policy_handle = eulfedora.models.DatastreamObject(self.ohandle, "POLICY", "POLICY", mimetype="text/xml", control_group="E") policy_handle.ds_location = "http://localhost/fedora/objects/%s/datastreams/POLICY_XML/content" % (policy_suffix) policy_handle.label = "POLICY" policy_handle.save() # generic hash of target ids target_ids = { 'IMAGE':'IMAGE_%d' % page_num, 'HTML':'HTML_%d' % page_num, 'ALTOXML':'ALTOXML_%d' % page_num } # for each file type in pages dict, pass page obj and process for ds in page_dict: if ds['ds_id'].startswith('IMAGE'): self.processImage(ds) if ds['ds_id'].startswith('HTML'): self.processHTML(ds) if ds['ds_id'].startswith('ALTOXML'): self.processALTOXML(ds) # write RDF relationships self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#hasContentModel", "info:fedora/CM:WSUebook_Page") self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#isConstituentOf", "info:fedora/%s" % self.book_obj.ohandle.pid) self.ohandle.add_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/pageOrder", page_num) # save page object return self.ohandle.save()
def ingestMissingPage(self, book_obj, page_num, from_bag=True): # set book_obj to self self.book_obj = book_obj # new pid npid = "wayne:%s_Page_%s" % (self.book_obj.pid.split(":")[1], page_num) # creating new self self.ohandle = fedora_handle.get_object(npid) if self.ohandle.exists: fedora_handle.purge_object(self.ohandle) self.ohandle = fedora_handle.get_object(npid, create=True) self.ohandle.save() # label self.ohandle.label = "%s - Page %s" % (self.book_obj.ohandle.label, page_num) # write POLICY datastream # NOTE: 'E' management type required, not 'R' print "Using policy:",self.book_obj.objMeta['policy'] policy_suffix = self.book_obj.objMeta['policy'].split("info:fedora/")[1] policy_handle = eulfedora.models.DatastreamObject(self.ohandle, "POLICY", "POLICY", mimetype="text/xml", control_group="E") policy_handle.ds_location = "http://localhost/fedora/objects/%s/datastreams/POLICY_XML/content" % (policy_suffix) policy_handle.label = "POLICY" policy_handle.save() print "Processing HTML placeholder" generic_handle = eulfedora.models.FileDatastreamObject(self.ohandle, "HTML", "HTML", mimetype="text/html", control_group='M') generic_handle.label = "HTML" generic_handle.content = "<p>[Page %s Intentionally Left Blank]</p>" % (page_num) generic_handle.save() print "Processing IMAGE placeholder" # passes 'from_bag' param self.processImage(None, exists=False, page_num=page_num, from_bag=from_bag) # write RDF relationships self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#hasContentModel", "info:fedora/CM:WSUebook_Page") self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#isConstituentOf", "info:fedora/%s" % self.book_obj.ohandle.pid) self.ohandle.add_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/pageOrder", page_num) self.ohandle.add_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/pageExists", False) # create IMAGE, HTML, ALTOXML for missing page print "Processing ALTOXML placeholder" generic_handle = eulfedora.models.FileDatastreamObject(self.ohandle, 'ALTOXML', 'ALTOXML', mimetype="text/xml", control_group='M') generic_handle.label = 'ALTOXML' generic_handle.content = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><alto xmlns="http://www.loc.gov/standards/alto/ns-v2#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"> <Description> <MeasurementUnit>pixel</MeasurementUnit> <OCRProcessing ID="IdOcr"> <ocrProcessingStep> <processingSoftware> <softwareCreator>ABBYY</softwareCreator> <softwareName>ABBYY Recognition Server</softwareName> <softwareVersion>4.0</softwareVersion> </processingSoftware> </ocrProcessingStep> </OCRProcessing> </Description> <Styles> <ParagraphStyle ID="StyleId-FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF-" ALIGN="Left" LEFT="0" RIGHT="0" FIRSTLINE="0"/> </Styles> <Layout> <Page ID="Page1" PHYSICAL_IMG_NR="1"> <PrintSpace HEIGHT="%s" WIDTH="%s" VPOS="0" HPOS="0"/> </Page> </Layout></alto>' % (self.faux_width, self.faux_height) generic_handle.save() # save page object return self.ohandle.save()
def checksum_worker(job_package): form_data = job_package['form_data'] print form_data # in confirmation present, change state if form_data['confirm_string'] == "CONFIRM": # grab target state target_state = form_data['target_state'] # set state print "Setting state to: %s" % (target_state) # get PID handle, set state, save() PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) obj_ohandle = obj_ohandle.ds_list for (name, loc) in obj_ohandle.items(): print name # getDatastreamObject('ACCESS').checksum_type # getDatastreamObject('ACCESS').checksum # take name, insert into .checksum and checksum_type # return datastream name and checksum results to page (which are then sorted by template) # not quiet sure what return does below return obj_ohandle # Enable Checksumming feature to be developed if Checksums are not enabled else: return "Confirmation not entered correctly, skipping."
def addDS_worker(job_package): form_data = job_package['form_data'] print form_data PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) # initialized DS object newDS = eulfedora.models.DatastreamObject(obj_ohandle, form_data['dsID'], form_data['dsLabel'], control_group=form_data['controlGroup']) # construct DS object if form_data['MIMEType'] != '': newDS.mimetype = form_data['MIMEType'] if form_data['dsLocation'] != '': newDS.ds_location = form_data['dsLocation'] # content if 'upload_data' in job_package: with open(job_package['upload_data'],'r') as fhand: newDS.content = fhand.read() elif form_data['content'] != '': newDS.content = form_data['content'] # save constructed object return newDS.save()
def editDSRegex_regex_worker(job_package): PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) # Raw Datastream via Fedora API ############################################################### raw_xml_URL = "http://{APP_HOST}/fedora/objects/{PID}/datastreams/MODS/content".format(PID=PID,APP_HOST=localConfig.APP_HOST) raw_xml = requests.get(raw_xml_URL).text.encode("utf-8") ############################################################### # get regex parameters form_data = job_package['form_data'] # search / replace regex_search = form_data['regex_search'].encode('utf-8') regex_replace = form_data['regex_replace'].encode('utf-8') new_string = re.sub(regex_search,regex_replace,raw_xml) # similar to addDS functionality DS_handle = eulfedora.models.DatastreamObject(obj_ohandle, "MODS", "MODS", control_group="X") # construct DS object DS_handle.mimetype = "text/xml" # content DS_handle.content = new_string # save constructed object return DS_handle.save()
def manageOAI_toggleSet_worker(self,harvest_status,object_uri,collectionPID): PID = object_uri.split("/")[1] ################################################ # check PIDlock lock_status = redisHandles.r_PIDlock.exists(PID) # if locked, divert if lock_status == True: time.sleep(.25) raise self.retry(countdown=3) else: redisHandles.r_PIDlock.set(PID,1) ################################################ isMemberOfOAISet_predicate = "http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isMemberOfOAISet" obj_handle = fedora_handle.get_object(object_uri) # toggle collection OAI relatedd RELS-EXT relationships if harvest_status == "False": print "%s was not part of set, enabling..." % (PID) toggle_function = obj_handle.add_relationship if harvest_status == "True": print "%s was harvestable, deactivating..." % (PID) toggle_function = obj_handle.purge_relationship # isMemberOfOAISet relationship predicate_string = isMemberOfOAISet_predicate object_string = "info:fedora/%s" % (collectionPID) return toggle_function(predicate_string, object_string)
def DCfromMODS_worker(job_package): PID = job_package['PID'] ohandle = fedora_handle.get_object(PID) # retrieve MODS MODS_handle = ohandle.getDatastreamObject('MODS') XMLroot = etree.fromstring(MODS_handle.content.serialize()) # 2) transform downloaded MODS to DC with LOC stylesheet print "XSLT Transforming: %s" % (PID) # Saxon transformation XSLhand = open('inc/xsl/MODS_to_DC.xsl','r') xslt_tree = etree.parse(XSLhand) transform = etree.XSLT(xslt_tree) DC = transform(XMLroot) # 2.5) scrub duplicate, identical elements from DC DC = utilities.delDuplicateElements(DC) # 3) save to DC datastream DS_handle = ohandle.getDatastreamObject("DC") DS_handle.content = str(DC) derive_results = DS_handle.save() print "DCfromMODS result:",derive_results return derive_results
def augmentCore(PID): print "Checking",PID # for all 'wayne' prefixes if PID.startswith("wayne:"): # get content type obj_ohandle = fedora_handle.get_object(PID) obj_risearch = obj_ohandle.risearch obj_spo = obj_risearch.spo_search("info:fedora/%s" % (PID), "info:fedora/fedora-system:def/relations-external#hasContentModel") obj_objects = obj_spo.objects() for obj in obj_objects: # ebooks if str(obj) == "info:fedora/CM:WSUebook": print "Firing ebook augment" ebookText(PID) # hierarchicalfiles if str(obj) == "info:fedora/CM:Document": print "Firing hierarchical augment" hierarchicalDocuments(PID) ####################################################### # consider adding more advanced indexing here, e.g. ####################################################### else: print "Does not have 'wayne' prefix, skipping augmentCore()..."
def __init__(self, pid=False, ds_id='PREMIS'): self.pid = pid self.ohandle = False self.premis_ds = False self.premis_tree = None # if pid provided, attempt to retrieve PREMIS if pid: self.ohandle = fedora_handle.get_object(pid) if ds_id in self.ohandle.ds_list: self.premis_ds = self.ohandle.getDatastreamObject('PREMIS') self.premis_root = self.premis_ds.content.node self.premis_tree = self.premis_root.getroottree() else: print "%s datastream not found, initializing blank PREMIS node" % ds_id # if no pre-exisintg PREMIS datastream, init new one if not self.premis_ds: ns = { "xsi": "http://www.w3.org/2001/XMLSchema-instance", "xsd": "http://www.w3.org/2001/XMLSchema", "premis": "info:lc/xmlns/premis-v2", } self.premis_root = etree.Element('premis', nsmap=ns) self.premis_tree = etree.ElementTree(self.premis_root)
def indexPageText(self): ''' When copying objects between repositories, indexing of pages is skipped. This function can be run to repeat that process. ''' for page in self.pages_from_rels: try: print "Working on page %d / %d" % (page, len(self.pages_from_rels)) # index in Solr bookreader core data = { "literal.id" : self.objMeta['identifier']+"_OCR_HTML_"+str(page), "literal.ItemID" : self.objMeta['identifier'], "literal.page_num" : page, "fmap.content" : "OCR_text", "commit" : "false" } ds_handle = fedora_handle.get_object("%s_Page_%d" % (self.pid, page)).getDatastreamObject("HTML") files = {'file': ds_handle.content} r = requests.post("http://localhost/solr4/bookreader/update/extract", data=data, files=files) except: raise Exception("Could not index page %d" % page) # commit print solr_bookreader_handle.commit()
def removeFromDPLA_worker(job_package): # get PID PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) # add relationship return obj_ohandle.purge_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isMemberOfOAISet", "info:fedora/wayne:collectionDPLAOAI")
def constituents(self): ''' Returns OrderedDict with pageOrder as key, digital obj as val ''' # get ordered, constituent objs sparql_response = fedora_handle.risearch.sparql_query('select $constituent WHERE {{ $constituent <info:fedora/fedora-system:def/relations-external#isConstituentOf> <info:fedora/%s> . }}' % (self.pid)) constituent_objects = [ fedora_handle.get_object(obj['constituent']) for obj in sparql_response ] return constituent_objects
def manageOAI_genItemID_worker(job_package): # get PID PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) # generate OAI identifier OAI_identifier = "oai:digital.library.wayne.edu:%s" % (PID) print obj_ohandle.add_relationship("http://www.openarchives.org/OAI/2.0/itemID", OAI_identifier)
def pages_from_rels(self): ''' Returns OrderedDict with pageOrder as key, digital obj as val ''' # get ordered, constituent objs sparql_response = fedora_handle.risearch.sparql_query('select $page $pageOrder WHERE {{ $page <info:fedora/fedora-system:def/relations-external#isConstituentOf> <info:fedora/%s> .$page <http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/pageOrder> $pageOrder . }} ORDER BY ASC($pageOrder)' % (self.pid)) constituent_objects = OrderedDict((int(page['pageOrder']), fedora_handle.get_object(page['page'])) for page in sparql_response) return constituent_objects
def ingestBag(self): if self.object_type != "bag": raise Exception("WSUDOR_Object instance is not 'bag' type, aborting.") # ingest Volume object try: self.ohandle = fedora_handle.get_object(self.objMeta['id'],create=True) self.ohandle.save() # set base properties of object self.ohandle.label = self.objMeta['label'] # write POLICY datastream (NOTE: 'E' management type required, not 'R') print "Using policy:",self.objMeta['policy'] policy_suffix = self.objMeta['policy'].split("info:fedora/")[1] policy_handle = eulfedora.models.DatastreamObject(self.ohandle,"POLICY", "POLICY", mimetype="text/xml", control_group="E") policy_handle.ds_location = "http://localhost/fedora/objects/{policy}/datastreams/POLICY_XML/content".format(policy=policy_suffix) policy_handle.label = "POLICY" policy_handle.save() # write objMeta as datastream objMeta_handle = eulfedora.models.FileDatastreamObject(self.ohandle, "OBJMETA", "Ingest Bag Object Metadata", mimetype="application/json", control_group='M') objMeta_handle.label = "Ingest Bag Object Metadata" objMeta_handle.content = json.dumps(self.objMeta) objMeta_handle.save() # write explicit RELS-EXT relationships for relationship in self.objMeta['object_relationships']: print "Writing relationship:",str(relationship['predicate']),str(relationship['object']) self.ohandle.add_relationship(str(relationship['predicate']),str(relationship['object'])) # writes derived RELS-EXT self.ohandle.add_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isRepresentedBy",self.objMeta['isRepresentedBy']) content_type_string = "info:fedora/CM:"+self.objMeta['content_type'].split("_")[1] self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#hasContentModel",content_type_string) # write MODS datastream objMeta_handle = eulfedora.models.FileDatastreamObject(self.ohandle, "MODS", "MODS descriptive metadata", mimetype="text/xml", control_group='M') objMeta_handle.label = "MODS descriptive metadata" file_path = self.Bag.path + "/data/MODS.xml" objMeta_handle.content = open(file_path) objMeta_handle.save() # save and commit object before finishIngest() final_save = self.ohandle.save() # finish generic ingest return self.finishIngest() # exception handling except Exception,e: print traceback.format_exc() print "Volume Ingest Error:",e return False
def editRELS_purge_worker(job_package): PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) form_data = job_package['form_data'] predicate_string = form_data['predicate'].encode('utf-8').strip() object_string = form_data['object'].encode('utf-8').strip() print "Removing the following predicate / subject: %s /%s" % (predicate_string, object_string) return obj_ohandle.purge_relationship(predicate_string, object_string)
def editRELS_modify_worker(job_package): PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) form_data = job_package['form_data'] new_predicate_string = form_data['new_predicate'].encode('utf-8').strip() old_predicate_string = form_data['old_predicate'].encode('utf-8').strip() new_object_string = form_data['new_object'].encode('utf-8').strip() old_object_string = form_data['old_object'].encode('utf-8').strip() return obj_ohandle.modify_relationship(old_predicate_string, old_object_string, new_object_string)
def editRELS_add_worker(job_package): PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) form_data = job_package['form_data'] if "literal" in form_data: predicate_string = form_data['predicate_literal'].encode('utf-8').strip() else: predicate_string = form_data['predicate'].encode('utf-8').strip() object_string = form_data['obj'].encode('utf-8').strip() return obj_ohandle.add_relationship(predicate_string, object_string)
def index(PIDnum): # gen PIDlet PIDlet = jobs.genPIDlet(int(PIDnum)) if PIDlet == False: return utilities.applicationError("PIDnum is out of range.") PIDlet['pURL'] = "/tasks/editDSMime/"+str(int(PIDnum)-1) PIDlet['nURL'] = "/tasks/editDSMime/"+str(int(PIDnum)+1) PID = PIDlet['cPID'] # get datastreams for object obj_ohandle = fedora_handle.get_object(PID) ds_list = obj_ohandle.ds_list return render_template("editDSMime.html", PIDlet=PIDlet, PIDnum=PIDnum, ds_list=ds_list, APP_HOST=localConfig.APP_HOST)
def _createVirtBook(self): ''' Target Datastreams: - DC - text/xml MARCXML - text/xml RELS-EXT - application/rdf+xml ''' print "generating virtual ScannedBook object" virtual_book_handle = fedora_handle.get_object(type=WSUDOR_ContentTypes.WSUDOR_Readux_VirtualBook) virtual_book_handle.create(self)
def pruneSolr_worker(job_package, PID=False): if PID: # prune specific PID solr_handle.delete_by_key(PID) return "PRUNED" else: doc_id = job_package['doc_id'] if not fedora_handle.get_object(doc_id).exists: print "Did not find object in Fedora, pruning from Solr..." solr_handle.delete_by_key(doc_id) return "PRUNED" else: return "IGNORED"
def manageOAI_toggleSet(PID): isOAIHarvestable_predicate = "http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isOAIHarvestable" # determine collection status obj_ohandle = fedora_handle.get_object(PID) harvest_status_gen = obj_ohandle.risearch.get_objects(obj_ohandle.uriref, isOAIHarvestable_predicate) harvest_status = harvest_status_gen.next() # get collection name dc_title_gen = obj_ohandle.risearch.get_objects(obj_ohandle.uriref,"dc:title") dc_title = dc_title_gen.next() # toggle collection OAI relatedd RELS-EXT relationships if harvest_status == "False": print "Object was not harvestable, enabling..." print obj_ohandle.modify_relationship(isOAIHarvestable_predicate, "False", "True") toggle_function = obj_ohandle.add_relationship if harvest_status == "True": print "Object was harvestable, deactivating..." print obj_ohandle.modify_relationship(isOAIHarvestable_predicate, "True", "False") toggle_function = obj_ohandle.purge_relationship # setSpec relationship predicate_string = "http://www.openarchives.org/OAI/2.0/setSpec" object_string = "set:%s" % (PID) print toggle_function(predicate_string, object_string) # setName relationship predicate_string = "http://www.openarchives.org/OAI/2.0/setName" object_string = dc_title print toggle_function(predicate_string, object_string) # toggle relationships for child objects (runs as celery task) # collection_objects = obj_ohandle.risearch.get_subjects("fedora-rels-ext:isMemberOfCollection",obj_ohandle.uriref) # for object_uri in collection_objects: # manageOAI_toggleSet_worker.apply_async( # kwargs={ # 'harvest_status':harvest_status, # 'object_uri':object_uri, # 'PID':PID # }, # queue=job_package['username'] # ) return redirect("/tasks/manageOAI/serverWide")
def _createVirtVolume(self): ''' Target Datastreams: - DC - text/xml - OCR - text/xml - PDF - application/pdf - RELS-EXT - applicaiton/rdf+xml ''' print "generating virtual ScannedVolume object" virtual_volume_handle = fedora_handle.get_object(type=WSUDOR_ContentTypes.WSUDOR_Readux_VirtualVolume) virtual_volume_handle.create(self)
def makeSymLink(PID, DS): returnDict = {} filename = "info:fedora/"+PID+"/"+DS+"/"+DS+".0" # get hash folder hashed_filename = hashlib.md5(urllib.unquote(filename)) dataFolder = hashed_filename.hexdigest()[0:2] filename_quoted = urllib.quote_plus(filename) # peculiars for Fedora #################################################### filename_quoted = filename_quoted.replace('_','%5F') #################################################### # symlink directory path_prefix = "/var/www/wsuls/symLinks/" # guess file extension try: extension_guess = mimetypes.guess_extension( fedora_handle.get_object(PID).getDatastreamObject(DS).mimetype ) if extension_guess == None: extension_guess == ".sym" except: extension_guess = ".sym" # construct full symlink path file_path = path_prefix+hashed_filename.hexdigest()+extension_guess returnDict['symlink'] = file_path; # exists if os.path.exists(file_path): return returnDict # create else: source_prefix = "/usr/local/fedora/data/datastreamStore/" source_path = source_prefix+dataFolder+"/"+filename_quoted if os.path.exists(source_path): os.symlink(source_path, file_path) return returnDict else: return "Target not found. Aborting."
def editDSMime_worker(job_package): form_data = job_package['form_data'] print form_data try: # get PID handle, set state, save() PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) # update mime/type ds_handle = obj_ohandle.getDatastreamObject(form_data['DSID'].encode('utf-8')) ds_handle.mimetype = form_data['mimetype'].encode('utf-8') # save constructed object return ds_handle.save() except: return "Could not edit Datastream Mime-Type"
def MODSimport_worker(job_package): ''' Receive job_package, which contains PID, update MODS ''' PID = job_package['PID'] MODS = job_package['MODS'] print "Updating MODS for %s" % (PID) # open temp MODS file, read, delete fhand = open(MODS,'r') MODS_string = fhand.read() fhand.close() os.system("rm %s" % (MODS)) obj_handle = fedora_handle.get_object(PID) ds_handle = obj_handle.getDatastreamObject("MODS") ds_handle.content = MODS_string return ds_handle.save()
def objectState_worker(job_package): form_data = job_package['form_data'] print form_data # in confirmation present, change state if form_data['confirm_string'] == "CONFIRM": # grab target state target_state = form_data['target_state'] # set state print "Setting state to: {target_state}".format(target_state=target_state) # get PID handle, set state, save() PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) obj_ohandle.state = target_state return obj_ohandle.save() else: return "Confirmation not entered correctly, skipping."
def index(): # get PID to examine, if noted if request.args.get("PIDnum") != None: PIDnum = int(request.args.get("PIDnum")) else: PIDnum = 0 # get PIDs PIDs = getSelPIDs() print PIDs[PIDnum] obj_ohandle = fedora_handle.get_object(PIDs[PIDnum]) obj_ohandle = obj_ohandle.ds_list dsIDs = [] for (name, loc) in obj_ohandle.items(): dsIDs.extend([name]) print dsIDs form = purgeDSForm() return render_template("purgeDS.html",form=form,PID=PIDs[PIDnum],dsIDs=dsIDs,PIDnum=PIDnum)
def DCfromMODS_single(PID): ohandle = fedora_handle.get_object(PID) # retrieve MODS MODS_handle = ohandle.getDatastreamObject('MODS') XMLroot = etree.fromstring(MODS_handle.content.serialize()) # 2) transform downloaded MODS to DC with LOC stylesheet print "XSLT Transforming: {PID}".format(PID=PID) # Saxon transformation XSLhand = open('inc/xsl/MODS_to_DC.xsl','r') xslt_tree = etree.parse(XSLhand) transform = etree.XSLT(xslt_tree) DC = transform(XMLroot) # 3) save to DC datastream DS_handle = ohandle.getDatastreamObject("DC") DS_handle.content = str(DC) derive_results = DS_handle.save() print "DCfromMODS result:",derive_results return derive_results
def editRELS_regex_worker(job_package): PID = job_package['PID'] obj_ohandle = fedora_handle.get_object(PID) # Eulfedora ############################################################### # obj_ohandle = fedora_handle.get_object(PIDs[PIDnum]) # try: # raw_xml = obj_ohandle.rels_ext.content.serialize() # except: # raw_xml = "COULD NOT PARSE" ############################################################### # Raw Datastream via Fedora API ############################################################### raw_xml_URL = "http://localhost/fedora/objects/%s/datastreams/RELS-EXT/content" % (PID) raw_xml = requests.get(raw_xml_URL).text.encode("utf-8") ############################################################### # get regex parameters form_data = job_package['form_data'] # search / replace regex_search = form_data['regex_search'].encode('utf-8') regex_replace = form_data['regex_replace'].encode('utf-8') new_string = re.sub(regex_search,regex_replace,raw_xml) # similar to addDS functionality newDS = eulfedora.models.DatastreamObject(obj_ohandle, "RELS-EXT", "RELS-EXT", control_group="X") # construct DS object newDS.mimetype = "application/rdf+xml" # content newDS.content = new_string # save constructed object print newDS.save()