def collectionsOverview(): # get username from session username = session['username'] # get objects PIDs = jobs.getSelPIDs() object_package = {} # get collection objects ''' This can be improved ''' riquery = fedora_handle.risearch.get_subjects(predicate="info:fedora/fedora-system:def/relations-external#hasContentModel", object="info:fedora/CM:Collection") collections = list(riquery) # assemble sizes object_package['coll_size_dict'] = {} for collection in collections: print "Working on",collection results = solr_handle.search(**{ "q":"rels_isMemberOfCollection:"+collection.replace(":","\:"), "stats":"true", "stats.field":"obj_size_i", "rows":0 }) print results.stats if results != None and results.total_results > 0 and results.stats['obj_size_i'] != None: collection_obj_sum = results.stats['obj_size_i']['sum'] object_package['coll_size_dict'][collection] = (collection_obj_sum,utilities.sizeof_fmt(collection_obj_sum),results.total_results) # print object_package['coll_size_dict'] object_package['coll_size_dict'] = json.dumps(object_package['coll_size_dict']) return render_template("collectionsOverview.html", object_package=object_package)
def search(self): solr_params = { "q": self.q, "rows": self.rows, "start": self.start, "sort": self.sort, "fl": self.fl } return solr_handle.search(**solr_params)
def search(self): solr_params = { "q":self.q, "rows":self.rows, "start":self.start, "sort":self.sort, "fl":self.fl } return solr_handle.search(**solr_params)
def getLink(self, doc_id): try: self.id = doc_id solr_params = { "q": "id:{doc_id}".format(doc_id=doc_id), "start": 0, "rows": 1 } search_results = solr_handle.search(**solr_params) doc = search_results.documents[0] self.doc = doc self.date_parsed = dateutil.parser.parse(self.doc['last_modified']) except: return False
def getLink(self, doc_id): try: self.id = doc_id solr_params = { "q":"id:{doc_id}".format(doc_id=doc_id), "start":0, "rows":1 } search_results = solr_handle.search(**solr_params) doc = search_results.documents[0] self.doc = doc self.date_parsed = dateutil.parser.parse(self.doc['last_modified']) except: return False
def total_record_count(self): # get total record count from Solr DB return solr_handle.search(q="*:*", rows=0).total_results
def PIDSolr(): ''' Current Approach: If POST, send results as large array to template, save as JS variable - works great so far at 800+ items, but what about 100,000+? - documentation says ~ 50,000 is the limit - will need to think of a server-side option ''' # get username from session username = session['username'] # get form form = forms.solrSearch(request.form) # collection selection coll_query = {'q':"rels_hasContentModel:*Collection", 'fl':["id","dc_title"], 'rows':1000} coll_results = solr_handle.search(**coll_query) coll_docs = coll_results.documents # check for title, give generic if not present for each in coll_docs: if 'dc_title' not in each: each['dc_title'] = [ 'Unknown Collection Title' + each['id'].encode('ascii','ignore') ] form.collection_object.choices = [(each['id'].encode('ascii','ignore'), each['dc_title'][0].encode('ascii','ignore')) for each in coll_docs] form.collection_object.choices.insert(0,("","All Collections")) # content model cm_query = {'q':'*', 'facet' : 'true', 'facet.field' : 'rels_hasContentModel'} cm_results = solr_handle.search(**cm_query) form.content_model.choices = [(each, each.split(":")[-1]) for each in cm_results.facets['facet_fields']['rels_hasContentModel']] form.content_model.choices.insert(0,("","All Content Types")) # perform search if request.method == 'POST': # build base with native Solr queries query = {'q':form.q.data, 'fq':[form.fq.data], 'fl':[form.fl.data], 'rows':100000} # Fedora RELS-EXT # collection selection if form.collection_object.data: print "Collection refinement:",form.collection_object.data escaped_coll = form.collection_object.data.replace(":","\:") query['fq'].append("rels_isMemberOfCollection:info\:fedora/"+escaped_coll) # content model / type selection if form.content_model.data: print "Content Model refinement:",form.content_model.data escaped_cm = form.content_model.data.replace(":","\:") query['fq'].append("rels_hasContentModel:"+escaped_cm) # issue query print query stime = time.time() q_results = solr_handle.search(**query) etime = time.time() ttime = (etime - stime) * 1000 print "Solr Query took:",ttime,"ms" output_dict = {} data = [] stime = time.time() for each in q_results.documents: try: PID = each['id'].encode('ascii','ignore') dc_title = each['dc_title'][0].encode('ascii','ignore') data.append([PID,dc_title]) except: print "Could not render:",each['id'] #unicdoe solr id etime = time.time() ttime = (etime - stime) * 1000 print "Solr Munging for DataTables took::",ttime,"ms" output_dict['data'] = data json_output = json.dumps(data) return render_template("PIDSolr.html",username=username, form=form, q_results=q_results, json_output=json_output, coll_docs=coll_docs,APP_HOST=localConfig.APP_HOST) # pass the current PIDs to page as list return render_template("PIDSolr.html",username=username, form=form, coll_docs=coll_docs,APP_HOST=localConfig.APP_HOST)
def objPreview(PIDnum): object_package = {} # GET CURRENT OBJECTS PIDlet = jobs.genPIDlet(int(PIDnum)) if PIDlet == False: return utilities.applicationError("PIDnum is out of range or invalid. Object-at-a-Glance is displeased.") PIDlet['pURL'] = "/objPreview/"+str(int(PIDnum)-1) PIDlet['nURL'] = "/objPreview/"+str(int(PIDnum)+1) # WSUDOR handle obj_handle = WSUDOR_ContentTypes.WSUDOR_Object(PIDlet['cPID']) # General Metadata solr_params = {'q':utilities.escapeSolrArg(PIDlet['cPID']), 'rows':1} solr_results = solr_handle.search(**solr_params) solr_package = solr_results.documents[0] object_package['solr_package'] = solr_package # COMPONENTS object_package['components_package'] = [] riquery = fedora_handle.risearch.spo_search(subject=None, predicate="info:fedora/fedora-system:def/relations-external#isMemberOf", object="info:fedora/"+PIDlet['cPID']) for s,p,o in riquery: object_package['components_package'].append(s.encode('utf-8')) if len(object_package['components_package']) == 0: object_package.pop('components_package') # RDF RELATIONSHIPS riquery = fedora_handle.risearch.spo_search(subject="info:fedora/"+PIDlet['cPID'], predicate=None, object=None) # parse riquery_filtered = [] for s,p,o in riquery: riquery_filtered.append((p,o)) riquery_filtered.sort() object_package['rdf_package'] = riquery_filtered # DATASTREAMS ds_list = obj_handle.ohandle.ds_list object_package['datastream_package'] = ds_list # Object size of datastreams size_dict = obj_handle.objSizeDict object_package['size_dict'] = size_dict object_package['size_dict_json'] = json.dumps(size_dict) # OAI OAI_dict = {} #identifer try: riquery = fedora_handle.risearch.spo_search(subject="info:fedora/"+PIDlet['cPID'], predicate="http://www.openarchives.org/OAI/2.0/itemID", object=None) OAI_ID = riquery.objects().next().encode('utf-8') OAI_dict['ID'] = OAI_ID except: print "No OAI Identifier found." # sets OAI_dict['sets'] = [] try: riquery = fedora_handle.risearch.spo_search(subject="info:fedora/"+PIDlet['cPID'], predicate="http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isMemberOfOAISet", object=None) for each in riquery.objects(): OAI_dict['sets'].append(each) except: print "No OAI sets found." object_package['OAI_package'] = OAI_dict print object_package['OAI_package'] # RENDER return render_template("objPreview.html",PIDnum=(int(PIDnum)+1),PIDlet=PIDlet,object_package=object_package,localConfig=localConfig)
def PIDSolr(): ''' Current Approach: If POST, send results as large array to template, save as JS variable - works great so far at 800+ items, but what about 100,000+? - documentation says ~ 50,000 is the limit - will need to think of a server-side option ''' # get username from session username = session['username'] # get form form = forms.solrSearch(request.form) # dynamically update fields # collection selection coll_query = { 'q': "rels_hasContentModel:*Collection", 'fl': ["id", "dc_title"], 'rows': 1000 } coll_results = solr_handle.search(**coll_query) coll_docs = coll_results.documents form.collection_object.choices = [ (each['id'].encode('ascii', 'ignore'), each['dc_title'][0].encode('ascii', 'ignore')) for each in coll_docs ] form.collection_object.choices.insert(0, ("", "All Collections")) # content model cm_query = { 'q': '*', 'facet': 'true', 'facet.field': 'rels_hasContentModel' } cm_results = solr_handle.search(**cm_query) form.content_model.choices = [ (each, each.split(":")[-1]) for each in cm_results.facets['facet_fields']['rels_hasContentModel'] ] form.content_model.choices.insert(0, ("", "All Content Types")) # perform search if request.method == 'POST': # build base with native Solr queries query = { 'q': form.q.data, 'fq': [form.fq.data], 'fl': [form.fl.data], 'rows': 100000 } # Fedora RELS-EXT # collection selection if form.collection_object.data: print "Collection refinement:", form.collection_object.data escaped_coll = form.collection_object.data.replace(":", "\:") query['fq'].append("rels_isMemberOfCollection:info\:fedora/" + escaped_coll) # content model / type selection if form.content_model.data: print "Content Model refinement:", form.content_model.data escaped_cm = form.content_model.data.replace(":", "\:") query['fq'].append("rels_hasContentModel:" + escaped_cm) # issue query print query stime = time.time() q_results = solr_handle.search(**query) etime = time.time() ttime = (etime - stime) * 1000 print "Solr Query took:", ttime, "ms" output_dict = {} data = [] stime = time.time() for each in q_results.documents: try: PID = each['id'].encode('ascii', 'ignore') dc_title = each['dc_title'][0].encode('ascii', 'ignore') data.append([PID, dc_title]) except: print "Could not render:", each['id'] #unicdoe solr id etime = time.time() ttime = (etime - stime) * 1000 print "Solr Munging for DataTables took::", ttime, "ms" output_dict['data'] = data json_output = json.dumps(data) return render_template("PIDSolr.html", username=username, form=form, q_results=q_results, json_output=json_output, coll_docs=coll_docs) # pass the current PIDs to page as list return render_template("PIDSolr.html", username=username, form=form, coll_docs=coll_docs)
def total_record_count(self): # get total record count from Solr DB return solr_handle.search(q="*:*",rows=0).total_results