def regexConfirm(): # get PIDs PIDs = jobs.getSelPIDs() form_data = request.form # search / replace orig_string = request.form['raw_xml'] regex_search = request.form['regex_search'].encode('utf-8') regex_replace = request.form['regex_replace'].encode('utf-8') new_string = re.sub(regex_search,regex_replace,orig_string) #debug return_package = { "orig_string":orig_string, "new_string":new_string, "regex_search":regex_search, "regex_replace":regex_replace } # check diff - if ratio == 100, XML is identical, simply reordered by RDF query if orig_string == new_string: return_package['string_match'] = True return render_template("editDSRegex_regexConfirm.html",return_package=return_package)
def updateSolr(update_type): # real or emulated solr events if update_type == "fullIndex": index_handle = solrIndexer.delay('fullIndex','') if update_type == "timestamp": print "Updating by timestamp" index_handle = solrIndexer.delay('timestampIndex','') if update_type == "userObjects": print "Updating by userObjects" PIDs = jobs.getSelPIDs() for PID in PIDs: index_handle = solrIndexer.delay('modifyObject', PID) # purge and reindex staging solr core from fedora (SLOW) if update_type == "purgeAndFullIndex": print "Purging solr core and reindexing all objects" # delete all from /fedobjs core if 'fedobjs' in solr_handle.base_url: solr_handle.delete_by_query('*:*',commit=False) # run full index index_handle = solrIndexer.delay('fullIndex','') # purge production core, replicate from staging (FAST) if update_type == "replicateStagingToProduction": index_handle = solrIndexer.delay('replicateStagingToProduction','') # return logic if "APIcall" in request.values and request.values['APIcall'] == "True": # prepare package return_dict = { "solrIndexer":{ "update_type":update_type, "timestamp":datetime.datetime.now().isoformat(), "job_ID":index_handle.id } } # return JSON print return_dict json_string = json.dumps(return_dict) resp = make_response(json_string) resp.headers['Content-Type'] = 'application/json' return resp else: return render_template("updateSolr.html",update_type=update_type,APP_HOST=localConfig.APP_HOST)
def index(): # get PID to examine, if noted if request.args.get("PIDnum") != None: PIDnum = int(request.args.get("PIDnum")) else: PIDnum = 0 # get PIDs PIDs = getSelPIDs() print PIDs[PIDnum] obj_ohandle = fedora_handle.get_object(PIDs[PIDnum]) obj_ohandle = obj_ohandle.ds_list dsIDs = [] for (name, loc) in obj_ohandle.items(): dsIDs.extend([name]) print dsIDs form = purgeDSForm() return render_template("purgeDS.html",form=form,PID=PIDs[PIDnum],dsIDs=dsIDs,PIDnum=PIDnum)
def viewManifests(PIDnum): # get PIDs PIDs = getSelPIDs() # GET CURRENT OBJECTS PIDlet = genPIDlet(int(PIDnum)) if PIDlet == False: return utilities.applicationError("PIDnum is out of range or invalid. We are is displeased.") PIDlet['pURL'] = "/tasks/iiifManifest/viewManifests/"+str(int(PIDnum)-1) PIDlet['nURL'] = "/tasks/iiifManifest/viewManifests/"+str(int(PIDnum)+1) # check Redis for manifest r_response = redisHandles.r_iiif.get(PIDlet['cPID']) if r_response != None: print "Redis hit for manifest." json_return = r_response else: print "Redis miss for manifest." json_return = json.dumps({"status":"manifest for %s not found in redis" % PIDlet['cPID']}) return render_template("iiifManifest_view.html",PIDnum=(int(PIDnum)+1),PIDlet=PIDlet, json_return=json.dumps( json.loads(json_return), indent=2), iiif_manifest_prefix=localConfig.IIIF_MANIFEST_PREFIX,APP_HOST=localConfig.APP_HOST )
def MODSexport_export(): # get username username = session['username'] #register namespaces etree.register_namespace('mods', 'mods:http://www.loc.gov/mods/v3') # collect MODS records for selected objects PIDs = jobs.getSelPIDs() with open('/tmp/Ouroboros/%s_MODS_concat.xml' % (username), 'w') as outfile: # write header outfile.write('<?xml version="1.0" encoding="UTF-8"?><mods:modsCollection xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mods="http://www.loc.gov/mods/v3" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd">\n') for PID in PIDs: # get MODS ds obj_ohandle = fedora_handle.get_object(PID) ds_handle = obj_ohandle.getDatastreamObject('MODS') # get MODS string MODS_string = unicode(ds_handle.content.serialize(),'utf-8') ''' Little bit of complexity here: For this kind of MODS in & out, we need the current PID associated with the file on the way out. Writing this to the <mods:extension> field, creating if not present. Update: This will be forced. Without the PID, records will not be associated. Critical for reingest. ''' # does <PID> element already exist? PID_check = ds_handle.content.node.xpath('//mods:extension/PID',namespaces=ds_handle.content.node.nsmap) # if not, continue with checks if len(PID_check) == 0: # check for <mods:extension>, if not present add extension_check = ds_handle.content.node.xpath('//mods:extension',namespaces=ds_handle.content.node.nsmap) # if absent, create with <PID> subelement if len(extension_check) == 0: #serialize and replace MODS_content = MODS_string MODS_content = MODS_content.replace("</mods:mods>","<mods:extension><PID>%s</PID></mods:extension></mods:mods>" % PID) # <mods:extension> present, but no PID subelement, create else: PID_elem = etree.SubElement(extension_check[0],"PID") PID_elem.text = PID #serialize MODS_content = MODS_string # overwrite with PID else: PID_element = PID_check[0] PID_element.text = PID #serialize MODS_content = MODS_string # # OLD # # if not, continue with checks # if len(PID_check) == 0: # # check for <mods:extension>, if not present add # extension_check = ds_handle.content.node.xpath('//mods:extension',namespaces=ds_handle.content.node.nsmap) # # if absent, create with <PID> subelement # if len(extension_check) == 0: # #serialize and replace # MODS_content = ds_handle.content.serialize() # MODS_content = MODS_content.replace("</mods:mods>","<mods:extension><PID>%s</PID></mods:extension></mods:mods>" % PID) # # <mods:extension> present, but no PID subelement, create # else: # PID_elem = etree.SubElement(extension_check[0],"PID") # PID_elem.text = PID # #serialize # MODS_content = ds_handle.content.serialize() # # skip <PID> element creation, just serialize # else: # MODS_content = ds_handle.content.serialize() # write to file outfile.write(MODS_content.encode('utf-8')) # close MODS collection outfile.write('\n</mods:modsCollection>') # close file outfile.close() # open file from tmp and return as download fhand = open('/tmp/Ouroboros/%s_MODS_concat.xml' % (username), 'r') response = make_response(fhand.read()) response.headers["Content-Disposition"] = "attachment; filename=MODS_export.xml" return response
def updateSolr(update_type): # real or emulated solr events if update_type == "fullIndex": if 'choice' not in request.form: return render_template('confirm.html',update_type=update_type) else: # fire only with confirmation if "choice" in request.form and request.form['choice'] == "confirm" and request.form['confirm_string'].lower() == 'confirm': index_handle = solrIndexer.delay('fullIndex', None) else: print 'skipping fullIndex' return redirect('/tasks/updateSolr/select') if update_type == "timestamp": print "Updating by timestamp" index_handle = solrIndexer.delay('timestampIndex', None) if update_type == "userObjects": print "Updating by userObjects" PIDs = jobs.getSelPIDs() for PID in PIDs: index_handle = solrIndexer.delay('modifyObject', PID) # purge and reindex fedobjs (SLOW) if update_type == "purgeAndFullIndex": if 'choice' not in request.form: return render_template('confirm.html',update_type=update_type) else: # fire only with confirmation if "choice" in request.form and request.form['choice'] == "confirm" and request.form['confirm_string'].lower() == 'confirm': print "Purging solr core and reindexing all objects" # delete all from /fedobjs core if 'fedobjs' in solr_handle.base_url: solr_handle.delete_by_query('*:*',commit=False) # run full index index_handle = solrIndexer.delay('fullIndex', None) else: print 'skipping purge and index' return redirect('/tasks/updateSolr/select') # return logic if "APIcall" in request.values and request.values['APIcall'] == "True": # prepare package return_dict = { "solrIndexer":{ "update_type":update_type, "timestamp":datetime.datetime.now().isoformat(), "job_ID":index_handle.id } } # return JSON print return_dict json_string = json.dumps(return_dict) resp = make_response(json_string) resp.headers['Content-Type'] = 'application/json' return resp else: return render_template("updateSolr.html",update_type=update_type,APP_HOST=localConfig.APP_HOST)
def index(): # get PIDs PIDs = getSelPIDs() return render_template("iiifManifest.html")
def index(): # get PIDs PIDs = getSelPIDs() return render_template("exportObject.html")
def objectRelated(): from WSUDOR_Manager import forms ''' Query to see what will show up in REPOX: SELECT rcItem.identifier AS pid FROM rcItem INNER JOIN rcRecord ON rcItem.itemKey = rcRecord.itemKey INNER JOIN rcMembership ON rcMembership.recordKey = rcRecord.recordKey INNER JOIN rcSet ON rcSet.setKey = rcMembership.setKey INNER JOIN rcFormat ON rcFormat.formatKey = rcRecord.formatKey WHERE rcFormat.mdPrefix = 'mods' AND rcSet.setSpec = 'set:wayne:collectionDPLAOAI'; ''' # get PIDs PIDs = getSelPIDs() # shared_relationships (in this instance, the PID of collection objects these assert membership to) shared_relationships = [] # function for shared query between whole and chunked queries def risearchQuery(list_of_PIDs): # construct where statement for query where_statement = "" for PID in list_of_PIDs: if PID != None: where_statement += "<fedora:{PID}> <http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isMemberOfOAISet> $object . $object <http://www.openarchives.org/OAI/2.0/setSpec> $setSpec . $object <http://www.openarchives.org/OAI/2.0/setName> $setName .".format(PID=PID) query_statement = "select $object $setSpec $setName from <#ri> where {{ {where_statement} }}".format(where_statement=where_statement) base_URL = "http://{FEDORA_USER}:{FEDORA_PASSWORD}@localhost/fedora/risearch".format(FEDORA_USER=FEDORA_USER,FEDORA_PASSWORD=FEDORA_PASSWORD) payload = { "lang" : "sparql", "query" : query_statement, "flush" : "false", "type" : "tuples", "format" : "JSON" } r = requests.post(base_URL, auth=HTTPBasicAuth(FEDORA_USER, FEDORA_PASSWORD), data=payload ) risearch = json.loads(r.text) return risearch # if more than 100 PIDs, chunk into sub-queries if len(PIDs) > 100: def grouper(iterable, chunksize, fillvalue=None): from itertools import izip_longest args = [iter(iterable)] * chunksize return izip_longest(*args, fillvalue=fillvalue) chunks = grouper(PIDs,100) for chunk in chunks: # perform query risearch = risearchQuery(chunk) chunk_list = [] for each in risearch['results']: tup = (each['object'].split("/")[1],each['setSpec'], each['setName']) chunk_list.append(tup) try: curr_set = set.intersection(curr_set,set(chunk_list)) except: curr_set = set(chunk_list) print curr_set shared_relationships = curr_set else: # perform query risearch = risearchQuery(PIDs) shared_relationships = [ (each['object'].split("/")[1],each['setSpec'], each['setName']) for each in risearch['results'] ] print shared_relationships # finally, find all currently available / defined sets form = forms.OAI_sets() active_sets = utilities.returnOAISets('dropdown') total_sets = len(active_sets) return render_template("manageOAI_objectRelated.html",shared_relationships=shared_relationships,form=form,active_sets=active_sets,total_sets=total_sets) return render_template("manageOAI_objectRelated.html")
def editRELS_shared(): ''' Will return only RDF statements shared (predicate AND object) by all PIDs - Requires workaround for large queries... - Eulfedora (uses GET, too small) - POST requests 100+ break sparql - Solution: for scenarios with 100+ PIDs, break into smaller queries, then mix together in results ''' # get PIDs PIDs = jobs.getSelPIDs() # shared relationships shared_relationships = [] # shared function for whole or chunked query def risearchQuery(list_of_PIDs): # construct where statement for query where_statement = "" for PID in list_of_PIDs: if PID != None: where_statement += "<fedora:%s> $predicate $object . " % (PID) query_statement = "select $predicate $object from <#ri> where {{ %s }}" % (where_statement) # print query_statement base_URL = "http://%s:%s@localhost/fedora/risearch" % (FEDORA_USER, FEDORA_PASSWORD) payload = { "lang" : "sparql", "query" : query_statement, "flush" : "false", "type" : "tuples", "format" : "JSON" } r = requests.post(base_URL, auth=HTTPBasicAuth(FEDORA_USER, FEDORA_PASSWORD), data=payload ) risearch = json.loads(r.text) return risearch # if more than 100 PIDs, chunk into sub-queries if len(PIDs) > 100: def grouper(iterable, chunksize, fillvalue=None): from itertools import izip_longest args = [iter(iterable)] * chunksize return izip_longest(*args, fillvalue=fillvalue) chunks = grouper(PIDs,100) for chunk in chunks: # perform query risearch = risearchQuery(chunk) chunk_list = [] for each in risearch['results']: tup = (each['predicate'],each['object']) chunk_list.append(tup) try: curr_set = set.intersection(curr_set,set(chunk_list)) except: curr_set = set(chunk_list) print curr_set shared_relationships = curr_set else: # perform query risearch = risearchQuery(PIDs) shared_relationships = [ (each['predicate'], each['object']) for each in risearch['results'] ] return render_template('editRELS_shared.html',shared_relationships=shared_relationships)