def findPublishedRecords(self): #req = SearchRequest("published:\"true\"") req = SearchRequest("storage_id:\"c6a214670dc644e5ebdaede4a2243f67\"") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
def handleWorkflowStep(self): out = ByteArrayOutputStream() req = SearchRequest("workflow_step_label:[* TO *]" ) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): recordTypeResults = res.getResults() for recordTypeResult in recordTypeResults: recordTypeList = recordTypeResult.getList("workflow_step_label") if (recordTypeList.isEmpty()==False): for hit in recordTypeList: hits.add(hit) self.writer.println("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.println(",{\"value\": \"%s\",\n\"label\": \"%s\"}" % (hit,hit)) else: self.writer.println("{\"value\": \"%s\",\n\"label\": \"%s\"}" % (hit,hit)) hitnum += 1 self.writer.println("]") else: self.writer.println("[\"\"]") self.writer.close()
def checkAprovedRequests(self, provisioned=0, startPage=1): """ A customised query for package type of arms at workflow_step of arms-approved Get a list of requests filtered by provisioning_checklist """ workflowStep = "arms-approved" if self.packageType: req = SearchRequest(self.packageType) else: req = SearchRequest("packageType:arms") req.addParam("fq", 'workflow_step:' + workflowStep) if provisioned: req.addParam("fq", '-provisioning_checklist.4:null') else: req.addParam("fq", 'provisioning_checklist.4:null') req.setParam("sort", "date_object_modified desc, f_dc_title asc") req.setParam("fl","id,dc_title,date-provisioned") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResults = SolrResult(ByteArrayInputStream(out.toByteArray())) if solrResults: results = solrResults.getResults() if results: results = self.mergeEvents(results, ["arms_draft","arms_redraft","arms_review","arms_approved","arms_rejected"]) self._setPaging(results.size()) return results else: return ArrayList()
def handleQuery(self, query, fieldName, formatStr): out = ByteArrayOutputStream() req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): results = res.getResults() for searchRes in results: searchResList = searchRes.getList(fieldName) if (searchResList.isEmpty()==False): for hit in searchResList: if self.term is not None: if hit.find(self.term) != -1: hits.add(hit) else: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print(","+formatStr % {"hit":hit}) else: self.writer.print(formatStr % {"hit":hit}) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def handleQuery(self, query, fieldName, formatStr): out = ByteArrayOutputStream() req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): results = res.getResults() for searchRes in results: searchResList = searchRes.getList(fieldName) if (searchResList.isEmpty() == False): for hit in searchResList: if self.term is not None: if hit.find(self.term) != -1: hits.add(hit) else: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print("," + formatStr % {"hit": hit}) else: self.writer.print(formatStr % {"hit": hit}) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def handleGrantNumber(self): out = ByteArrayOutputStream() req = SearchRequest("grant_numbers:%s*" % self.term) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): creatorResults = res.getResults() for creatorRes in creatorResults: creatorList = creatorRes.getList("grant_numbers") if (creatorList.isEmpty()==False): for hit in creatorList: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print(",\"%s\"" % hit) else: self.writer.print("\"%s\"" % hit) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def findPackagesToTransition(self, fromWorkflowId, fromWorkflowStage): req = SearchRequest("workflow_id:"+fromWorkflowId+" AND _query_:\"workflow_step:"+fromWorkflowStage+"\"") req.setParam("fq", "owner:[* TO *]") req.setParam("fq", "security_filter:[* TO *]") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
def checkRequests(self, checklist_filter=['1'], role_filter='reviewer', exclusive=True, startPage=1): """ A customised query for arms at arms-review Get a list of requests filtered by provisioning_checklist For assessor, it is user based and queries against committee-responses.metadata """ workflowStep = "arms-review" if self.packageType: req = SearchRequest(self.packageType) else: req = SearchRequest("packageType:arms") req.addParam("fq", 'workflow_step:' + workflowStep) for item in ['1','2','3','4']: if item in checklist_filter: req.addParam("fq", '-provisioning_checklist.' + item + ':null' + ' AND provisioning_checklist.' + item + ':[* TO *]') else: if exclusive: # ensure that brand new submissions (not yet saved by reviewer) are also returned req.addParam("fq", 'provisioning_checklist.' + item + ':null' + ' OR (*:* -provisioning_checklist.' + item + ':[* TO *])') req.setParam("sort", "date_object_modified desc, f_dc_title asc") req.setParam("fl",self.returnFields) out = ByteArrayOutputStream() self.indexer.search(req, out) solrResults = SolrResult(ByteArrayInputStream(out.toByteArray())) if solrResults: results = solrResults.getResults() if results: packageResults = results results = self.mergeEvents(packageResults, ["arms_draft","arms_redraft","arms_review","arms_approved","arms_rejected"]) returnArray = JSONArray() if role_filter.startswith('assessor'): if role_filter == 'assessor': query_status = ['new', 'draft'] elif role_filter == 'assessor-assessed': query_status = ['submitted'] x = Assessment() x.activate(self.velocityContext) i = 0 rows = self.recordsPerPage start = (startPage - 1) * self.recordsPerPage for r in results: status = x.queryStatus(r.get("id")) if status in query_status: if i >= start and i - start < rows: if status == 'submitted': assessment_submitted_date = x.queryMyAttr('date') if assessment_submitted_date: r.getJsonObject().put('date', assessment_submitted_date) returnArray.add(r) i = i + 1 else: returnArray = results self._setPaging(returnArray.size()) return returnArray else: return ArrayList()
def findPackagesToPurge(self,packageType): req = SearchRequest("display_type:"+packageType +" AND date_object_created:[* TO NOW-7DAY]") req.setParam("fq", "owner:[* TO *]") req.setParam("fq", "security_filter:[* TO *]") req.setParam("fl", "storage_id,date_object_created,date_object_modified") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
def getAttachments(self): attachmentType = "review-attachments" req = SearchRequest("attached_to:%s AND attachment_type:%s" % (self.oid, attachmentType)) req.setParam("rows", "1000") out = ByteArrayOutputStream() self.Services.indexer.search(req, out) response = SolrResult(ByteArrayInputStream(out.toByteArray())) return response.getResults()
def findPackagesToTransition(self, fromWorkflowId, fromWorkflowStage): req = SearchRequest("workflow_id:" + fromWorkflowId + " AND _query_:\"workflow_step:" + fromWorkflowStage + "\"") req.setParam("fq", "owner:[* TO *]") req.setParam("fq", "security_filter:[* TO *]") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
def _packageResults(self, req, solrLog=None): out = ByteArrayOutputStream() if solrLog: self.indexer.searchByIndex(req, out, solrLog) else: self.indexer.searchByIndex(req, out) solrResults = SolrResult(ByteArrayInputStream(out.toByteArray())) if solrResults: return solrResults.getResults() else: return ArrayList()
def findPackagesToPurge(self, packageType): req = SearchRequest("display_type:" + packageType + " AND date_object_created:[* TO NOW-7DAY]") req.setParam("fq", "owner:[* TO *]") req.setParam("fq", "security_filter:[* TO *]") req.setParam("fl", "storage_id,date_object_created,date_object_modified") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
class ReportsData: def __init__(self): pass def __activate__(self, context): #import pydevd;pydevd.settrace() self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.log = context["log"] self.__harvestList = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() # Security prep work isAdmin = self.vc("page").authentication.is_admin() if not isAdmin: print "ERROR: User is not an admin '" return None req = SearchRequest('eventType:harvestStart') req.setParam("rows", "100") out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__harvestList = SolrResult(ByteArrayInputStream(out.toByteArray())) def getHarvestlist(self): return self.__harvestList.getResults() def getItemCount(self): return self.__harvestList.getNumFound()
class ReportsData: def __init__(self): pass def __activate__(self, context): #import pydevd;pydevd.settrace() self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.log = context["log"] self.__harvestList = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() # Security prep work isAdmin = self.vc("page").authentication.is_admin() if not isAdmin: print "ERROR: User is not an admin '" return None req = SearchRequest('eventType:harvestStart') req.setParam("rows", "100") out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__harvestList = SolrResult(ByteArrayInputStream( out.toByteArray())) def getHarvestlist(self): return self.__harvestList.getResults() def getItemCount(self): return self.__harvestList.getNumFound()
def __activate__(self, context): formData = context["formData"] services = context["Services"] response = context["response"] # Prepare a query q = formData.get("q") if q is not None and q != "": query = "(item_type:object AND " + self.titleTokens( q.strip()) + ")" else: query = "item_type:object" # Can't link to yourself AND we're not interested in attachments oid = formData.get("qs") query += " AND -storage_id:\"" + oid + "\"" # And we're not interested in attachments query += " AND display_type:\"package-dataset\"" req = SearchRequest(query) req.setParam("fl", "dc_title,storage_id,pidProperty") limit = formData.get("limit") if limit is None: limit = 10 req.setParam("rows", limit) # Search Solr indexer = services.getIndexer() out = ByteArrayOutputStream() indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) # Build a response list = [] for doc in result.getResults(): title = doc.getFirst("dc_title") #oid = doc.getFirst("storage_id") oid = doc.getFirst("pidProperty") list.append("%s::%s" % (oid, title)) result = "\n".join(list) writer = response.getPrintWriter("text/plain; charset=UTF-8") writer.println(result) writer.close()
def getAttachedFiles(self, oid): # Build a query req = SearchRequest("attached_to:%s" % oid) req.setParam("rows", "1000") # Run a search out = ByteArrayOutputStream() self.Services.getIndexer().search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) # Process results docs = JSONArray() for doc in result.getResults(): attachmentType = self.escapeHtml(WordUtils.capitalizeFully(doc.getFirst("attachment_type").replace("-", " "))) accessRights = self.escapeHtml(WordUtils.capitalizeFully(doc.getFirst("access_rights"))) entry = JsonObject() entry.put("filename", self.escapeHtml(doc.getFirst("filename"))) entry.put("attachment_type", attachmentType) entry.put("access_rights", accessRights) entry.put("id", self.escapeHtml(doc.getFirst("id"))) docs.add(entry) return docs
def __activate__(self, context): formData = context["formData"] services = context["Services"] response = context["response"] # Prepare a query q = formData.get("q") if q is not None and q != "": query = "(item_type:object AND " + self.titleTokens(q.strip()) + ")" else: query = "item_type:object" # Can't link to yourself AND we're not interested in attachments oid = formData.get("qs") query += " AND -storage_id:\""+oid+"\"" # And we're not interested in attachments query += " AND display_type:\"package-dataset\"" req = SearchRequest(query) req.setParam("fl", "dc_title,storage_id,pidProperty") limit = formData.get("limit") if limit is None: limit = 10 req.setParam("rows", limit) # Search Solr indexer = services.getIndexer() out = ByteArrayOutputStream() indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) # Build a response list = [] for doc in result.getResults(): title = doc.getFirst("dc_title") #oid = doc.getFirst("storage_id") oid = doc.getFirst("pidProperty") list.append("%s::%s" % (oid, title)) result = "\n".join(list) writer = response.getPrintWriter("text/plain; charset=UTF-8") writer.println(result) writer.close()
def __activate__(self, context): formData = context["formData"] services = context["Services"] response = context["response"] query = "keywords:[* TO *]" q = formData.get("q") if q: query += " AND keywords:(%(q)s OR %(q)s*)" % { "q": q } req = SearchRequest(query) req.setParam("fl", "keywords") req.setParam("rows", "50") keywords = TreeSet() indexer = services.getIndexer() out = ByteArrayOutputStream() indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) for doc in result.getResults(): for keyword in doc.getList("keywords"): if keyword.startswith(q): keywords.add(keyword) writer = response.getPrintWriter("text/plain; charset=UTF-8") writer.println("\n".join(keywords)) writer.close()
def getAttachedFiles(self, oid): # Build a query req = SearchRequest("attached_to:%s" % oid) req.setParam("rows", "1000") # Run a search out = ByteArrayOutputStream() self.Services.getIndexer().search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) # Process results docs = JSONArray() for doc in result.getResults(): attachmentType = self.escapeHtml( WordUtils.capitalizeFully( doc.getFirst("attachment_type").replace("-", " "))) accessRights = self.escapeHtml( WordUtils.capitalizeFully(doc.getFirst("access_rights"))) entry = JsonObject() entry.put("filename", self.escapeHtml(doc.getFirst("filename"))) entry.put("attachment_type", attachmentType) entry.put("access_rights", accessRights) entry.put("id", self.escapeHtml(doc.getFirst("id"))) docs.add(entry) return docs
def getFilteredAssessments(self, packageType, stageName, filterType, startPage=1): """ A customised query to use filter to get certain assessment with desired status """ ## reference /redbox-rdsi-arms/src/main/config/home/lib/jython/util/Assessment.py for methods filters = {'assessment-draft': ['new','draft'], 'assessment-submitted':['submitted']} statusFilter = filters[filterType] req = SearchRequest("packageType:" + packageType) req.addParam("fq", 'workflow_step:' + stageName) req.setParam("sort", "date_object_modified desc, f_dc_title asc") req.setParam("fl",self.returnFields) out = ByteArrayOutputStream() self.indexer.search(req, out) solrResults = SolrResult(ByteArrayInputStream(out.toByteArray())) if solrResults: results = solrResults.getResults() returnArray = JSONArray() x = Assessment() x.activate(self.velocityContext) i = 0 rows = self.recordsPerPage start = (startPage - 1) * self.recordsPerPage for r in results: status = x.queryStatus(r.get("id")) if status in statusFilter: if i >= start and i - start < rows: assessment_submitted_date = x.queryAttr(r.get("id"), 'date') if assessment_submitted_date: r.getJsonObject().put('date', assessment_submitted_date) returnArray.add(r) i = i + 1 self._setPaging(returnArray.size()) return returnArray else: return ArrayList()
def __search(self): requireEscape = False recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) query = None pagePath = self.__portal.getName() + "/" + self.pageName if query is None or query == "": query = self.formData.get("query") requireEscape = True if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query if requireEscape: query = self.__escapeQuery(query) query = "%s:%s" % (self.__searchField, query) self.sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query if requireEscape: anotarQuery = self.__escapeQuery(anotarQuery) annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() self.services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = SolrResult( ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getResults() ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.getFirst("annotatesUri") ids.add(annotatesUri) self.log.debug("Found annotation for %s" % annotatesUri) # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort())) req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", self.__sortBy) # setup facets if self.__useSessionNavigation: action = self.formData.get("verb") value = self.formData.get("value") fq = self.sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) else: navUri = uri[len(pagePath):] self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri) savedfq = self.sessionState.get("savedfq") limits = [] if savedfq: limits.extend(savedfq) if fq: limits.extend(fq) self.sessionState.set("savedfq", limits) for q in fq: req.addParam("fq", URLDecoder.decode(q, "UTF-8")) portalQuery = self.__portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", 'item_type:"object"') if req.getParams("fq"): self.__selected = ArrayList(req.getParams("fq")) if self.__useSessionNavigation: self.sessionState.set("fq", self.__selected) self.sessionState.set("searchQuery", portalSearchQuery) self.sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not self.page.authentication.is_admin(): current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join( security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req.addParam("fq", security_query) ## uncomment to ensure guest users not logged in cannot see alerts in browse page # self.filterOutWorkflowStepForUnAuth(req, "inbox") req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) self.log.debug(" * search.py: %s, page: %s" % (req.toString(), self.__pageNum)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, self.__result.getNumFound(), self.__portal.recordsPerPage)
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.__latest = None self.__mine = None self.__workflows = None self.__result = None self.__steps = None self.__selfservicesStages = None self.__search() # Get from velocity context Mint version def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() portalQuery = self.services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = self.services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("last_modified:[NOW-1MONTH TO *]") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest(owner_query) req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('workflow_security:"' + current_user + '"') req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.vc("sessionState").set("fq", 'item_type:"object"') #sessionState.set("query", portalQuery.replace("\"", "'")) # Load in the services UI workflow selfSubmitWfConfig = JsonSimple(FascinatorHome.getPathFile("harvest/workflows/servicesUI.json")) selfSubmitJsonStageList = selfSubmitWfConfig.getJsonSimpleList(["stages"]) servicesStages = [] for jsonStage in selfSubmitJsonStageList: wfStage = WorkflowStage(jsonStage, self.__steps) servicesStages.append(wfStage) self.__selfservicesStages = servicesStages self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) def getLatest(self): return self.__latest.getResults() def getMine(self): return self.__mine.getResults() def getWorkflows(self): return self.__workflows.getResults() def getItemCount(self): return self.__result.getNumFound() def getServicesStages(self): return self.__servicesStages
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.__latest = None self.__mine = None self.__workflows = None self.__result = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() portalQuery = self.services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = self.services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("last_modified:[NOW-1MONTH TO *]") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest(owner_query) req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('workflow_security:"' + current_user + '"') req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.vc("sessionState").set("fq", 'item_type:"object"') #sessionState.set("query", portalQuery.replace("\"", "'")) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) def getLatest(self): return self.__latest.getResults() def getMine(self): return self.__mine.getResults() def getWorkflows(self): return self.__workflows.getResults() def getItemCount(self): return self.__result.getNumFound()
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.__latest = None self.__steps = None self.__alerts = None self.__result = None self.__stages = None self.__embargoes = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.velocityContext["log"].error("ERROR: Requested context entry '{}' doesn't exist", index) return None def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = Services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") req.setParam("facet", "true") req.setParam("facet.field", "workflow_step") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) steps = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__steps = steps.getFacets().get("workflow_step") wfConfig = JsonSimple(FascinatorHome.getPathFile("harvest/workflows/dataset.json")) jsonStageList = wfConfig.getJsonSimpleList(["stages"]) stages = [] for jsonStage in jsonStageList: wfStage = WorkflowStage(jsonStage, self.__steps) stages.append(wfStage) self.__stages = stages req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "25") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) req.addParam("fq", "workflow_step:%s" % stages[0].getName()) out = ByteArrayOutputStream() indexer.search(req, out) self.__alerts = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("last_modified:[NOW-1MONTH TO *] AND workflow_step:live") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) self._searchEmbargoes() self.vc("sessionState").set("fq", 'item_type:"object"') def getLatest(self): return self.__latest.getResults() def getAlerts(self): return self.__alerts.getResults() def getItemCount(self): return self.__result.getNumFound() def getStages(self): return self.__stages def getEmbargoes(self): return self.__embargoes.getResults() def _searchEmbargoes(self): req = SearchRequest("item_type:object") req.setParam("fq", 'redbox\:embargo.redbox\:isEmbargoed:on') req.addParam("fq", 'workflow_step:final-review') req.addParam("fq", "") req.setParam("fl","id,date_embargoed,dc_title") req.setParam("rows", "25") req.setParam("sort", "date_embargoed asc, dc_title asc"); out = ByteArrayOutputStream() indexer = Services.getIndexer() indexer.search(req, out) self.__embargoes = SolrResult(ByteArrayInputStream(out.toByteArray())) self.velocityContext["log"].info("searchEmbargoes call ended" + str(self.__embargoes)) def formatDate(self, date): dfSource = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss") dfTarget = SimpleDateFormat("dd/MM/yyyy") return dfTarget.format(dfSource.parse(date))
class ReportResultData: def __init__(self): pass def __activate__(self, context): self.__reportResult = None self.auth = context["page"].authentication self.request = context["request"] self.response = context["response"] self.log = context["log"] self.reportManager = context["Services"].getService("reportManager") self.indexer = context['Services'].getIndexer() self.metadata = context["metadata"] self.systemConfig = context["systemConfig"] self.__rowsFound = 0 self.__rowsFoundSolr = 0 self.processed_results_list = [] self.errorMsg = "" if (self.auth.is_logged_in()): if (self.auth.is_admin()==True): self.buildDashboard(context) else: self.errorMsg = "Requires Admin / Librarian / Reviewer access." else: self.errorMsg = "Please login." self.__reportSearch() def __reportSearch(self): self.reportId = self.request.getParameter("id") self.format = self.request.getParameter("format") self.report = self.reportManager.getReports().get(self.reportId) self.reportQuery = self.report.getQueryAsString() self.log.debug("Report query: " + self.reportQuery) #Get a total number of records try: out = ByteArrayOutputStream() recnumreq = SearchRequest(self.reportQuery) recnumreq.setParam("rows", "0") self.indexer.search(recnumreq, out) recnumres = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__rowsFoundSolr = "%s" % recnumres.getNumFound() except: self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return #Setup the main query req = SearchRequest(self.reportQuery) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", self.__rowsFoundSolr) try: #Now do the master search out = ByteArrayOutputStream() self.indexer.search(req, out) self.__reportResult = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__checkResults() except: self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return #At this point the display template has enough to go with. #We just need to handle the CSV now if (self.format == "csv"): #Setup the main query - we need to requery to make sure we return #only the required fields. We'll use the specific IDs that met the #__checkResults check req = SearchRequest(self.reportQuery) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", self.__rowsFoundSolr) req.setParam("csv.mv.separator",";") #we need to get a list of the matching IDs from Solr #this doesn't work for long queries so it's abandoned #but left here commented to make sure we don't try it again #idQry = "" #for item in self.getProcessedResultsList(): # idQry += item.get("id") + " OR " #req.setParam("fq", 'id:(%s)' % idQry[:len(idQry)-4]) #Create a list of IDs for reference when preparing the CSV idQryList = [] for item in self.getProcessedResultsList(): idQryList.append(item.get("id")) #Setup SOLR query with the required fields self.fields = self.systemConfig.getArray("redbox-reports","csv-output-fields") #We must have an ID field and it must be the first field fieldString = "id," if self.fields is not None: for field in self.fields: fieldString = fieldString+ field.get("field-name")+"," fieldString = fieldString[:-1] req.setParam("fl",fieldString) out = ByteArrayOutputStream() try: self.indexer.search(req, out, self.format) except: #We can't get the result back from SOLR so fail back to the template display self.errorMsg = "Query failure. Failed to load the data - this issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return try: csvResponseString = String(out.toByteArray(),"utf-8") csvResponseLines = csvResponseString.split("\n") except: #We can't get the result back from SOLR so fail back to the template display self.errorMsg = "Query failure. Failed to prepare the CSV - this issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return fileName = self.urlEncode(self.report.getLabel()) self.log.debug("Generating CSV report with file name: " + fileName) self.response.setHeader("Content-Disposition", "attachment; filename=%s.csv" % fileName) sw = StringWriter() parser = CSVParser() writer = CSVWriter(sw) count = 0 prevLine = "" badRowFlag = False for line in csvResponseLines: if badRowFlag: #In this section of code we'll handle errors by either trying to fix the problem #or by adding an error line in the CSV. We'll then move to the next row and keep going try: self.log.debug("Reporting - trying to append the previous line with the previous faulty one. Line appears as: %s" % prevLine + line) csvLine = parser.parseLine(prevLine + line) badRowFlag = False prevLine = "" self.log.debug("Reporting - remedy appears to have worked. Line appears as: %s" % prevLine + line) except: #We tried to rescue the file but failed on the second run so give up writer.writeNext(["Failed to transfer record to CSV - check logs"]) self.log.error("Reporting threw an exception (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], prevLine + line)) else: try: csvLine = parser.parseLine(line) badRowFlag = False prevLine = "" except: #This can happen if there's a newline in the index data #so we raise the badRowFlag and see if we can join this #row to the next one to fix it self.log.debug("Reporting threw an exception but I'll see if it's just a formatting issue (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], line)) badRowFlag = True prevLine = line continue if count == 0 : #Header row count += 1 for idx, csvValue in enumerate(csvLine): csvLine[idx] = self.findDisplayLabel(csvValue) elif csvLine[0] not in idQryList: #ignore continue writer.writeNext(csvLine) #Now send off the CSV self.out = self.response.getOutputStream("text/csv") self.out.print(sw.toString()) self.out.close() def __checkResults(self): #This is a fix, required because our SOLR index doesn't support #all of the required reporting criteria - specifically exact/contains self.processed_results_list = [] if self.__reportResult is None: return #Get the report criteria criteria = self.report.getCriteria() #For each result item we need to check that it matches the criteria for item in self.getReportResult(): #Use last check to assist in the left-to-right check of operators lastCheck = True dropResultFlag = False #For each criteria item for criteria_item in criteria.getCriteria(): #If the last criteria item didn't check out and the AND op is used, the record doesn't make it if not lastCheck and criteria_item.getOperator() == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_AND: dropResultFlag = True break # sanitise solr field criteria_item.setSolr_field(String(criteria_item.getSolr_field()).replace("\\", "")) thisCheck = False if self.__checkResultsNull(criteria_item, item): if self.__checkResultsMatch(criteria_item, item): thisCheck = True #If this criteria item and the last one didn't check out and the OR op is used, the record doesn't make it if (lastCheck or thisCheck) and criteria_item.getOperator() == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_OR: dropResultFlag = False elif (lastCheck and thisCheck) and criteria_item.getOperator() == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_AND: dropResultFlag = False else: #This row doesn't match dropResultFlag = True break lastCheck = thisCheck #End of criteria check loop if not dropResultFlag: #Copy over to the new listing self.processed_results_list.append(item) self.__rowsFound = len(self.processed_results_list) def __checkResultsNull(self, criteria_item, item): # Check Null criteria if criteria_item.getAllowNulls() == "field_include_null": #If the query criteria allows nulls and the field is null, true if item.get(criteria_item.getSolr_field()) is None: return True else: return False if item.get(criteria_item.getSolr_field()) is None: return False else: return True def __checkResultsMatch(self, criteria_item, item): if criteria_item.getAllowNulls() == "field_include_null": #If the query criteria allows nulls and the field is null, true if item.get(criteria_item.getSolr_field()) is None: return True #Some fields are lists so just handle lists solrvallist = ArrayList() solrval = item.getString(None, criteria_item.getSolr_field()); if solrval is None: solrvallist = item.getList(criteria_item.getSolr_field()); else: solrvallist.add(solrval) #If the query's matching criteria uses 'equals', check that it's an exact match for solrval in solrvallist: if criteria_item.getMatchingOperator() == "field_match": if String(String(solrval).trim()).equalsIgnoreCase(String(criteria_item.getValue()).trim()): #self.log.debug("Matched at: field_match --> %s == %s" %(solrval, criteria_item.getValue())) #self.log.debug("criteria_item.getSolr_field() -> " + criteria_item.getSolr_field()) #self.log.debug("solrvallist:%s" % solrvallist ) return True else: #This is a contains search if solrval.lower().find(criteria_item.getValue().lower()) != -1: return True return False def getProcessedResultsList(self): return self.processed_results_list def findDisplayLabel(self, csvValue): if self.fields is not None: for field in self.fields: if field.get("field-name") == csvValue: return field.get("label") return csvValue def getErrorMsg(self): return self.errorMsg def buildDashboard(self, context): self.velocityContext = context def getReportResult(self): return self.__reportResult.getResults() def getReportName(self): return self.report.getReportName() def getReportLabel(self): return self.report.getLabel() def urlEncode(self, text): return URLEncoder.encode(text, "utf-8") def escapeHtml(self, value): if value: return StringEscapeUtils.escapeHtml(value) or "" return "" def getRowsFound(self): return self.__rowsFound
def findPublishedRecords(self): req = SearchRequest("published:\"true\"") out = ByteArrayOutputStream() self.indexer.search(req, out) solrResult = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrResult.getResults()
class OaiData: def __init__(self): self.tokensDB = None def __activate__(self, context): if self.tokensDB is None: self.tokensDB = TokensDatabase(context) # Set up configuration self.systemConfig = JsonSimpleConfig() self.oaiConfig = None self.getMetadataFormats() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None # Check if the OAI request has an overriding portal ('set') to the URL paramSet = self.vc("formData").get("set") self.__portalName = context["page"].getPortal().getName() illegalSet = False if paramSet is not None: portals = self.vc("page").getPortals().keySet() if portals.contains(paramSet): self.__portalName = paramSet else: illegalSet = True self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) # Check if there's a resumption token in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken is not None: # This could still be be null self.__currentToken = self.tokensDB.getToken(resumptionToken) # Process/parse the request we've received for validity self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken) if self.getError() is None and illegalSet: self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet) # If there are no errors... and the request requires some additional # data (like a search result) do so now. Everything else can be # handled in the templates. if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: # Find the metadata prefix requested self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() # Only list records if the metadata format is enabled in this view if self.isInView(self.__metadataPrefix): self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def isInView(self, format, view = None): # Sanity check if format is None or format == "": return False # Default to current poral if view is None: view = self.__portalName # Make sure there is some config for this format formatConfig = self.getMetadataFormats().get(format) if formatConfig is None: return False # Is it visible everywhere? allViews = formatConfig.getBoolean(False, ["enabledInAllViews"]) if allViews: return True # Check if it is visible in this view else: allowedViews = formatConfig.getStringList(["enabledViews"]) if view in allowedViews: return True # Rejection return False def getID(self, item): identifier = item.getFirst("oai_identifier") # Fallback to the default if identifier is None or identifier == "": return "oai:fascinator.usq.edu.au:" + item.getFirst("id") # Use the indexed value return identifier def isDeleted(self, item): return bool(item.getFirst("oai_deleted")) def getSet(self, item): set = item.getFirst("oai_set") # Fallback to the portal name if set is None or set == "": return self.__portalName # Use the required set return set def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator.usq.edu.au:"): idString = id.replace("oai:fascinator.usq.edu.au:", "") idString = self.__escapeQuery(idString) query = "id:" + idString # Or a custom OAI ID else: idString = self.__escapeQuery(id) query = "oai_identifier:" + idString req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Date data... is supplied fromDate = self.__request.getFromDate() untilDate = self.__request.getUntilDate() if fromDate is not None: fromStr = fromDate.isoformat() + "Z" self.log.debug("From Date: '{}'", fromStr) if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilStr) queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr) else: queryStr = "last_modified:[%s TO *]" % (fromStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) else: if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilDate.isoformat()) queryStr = "last_modified:[* TO %s]" % (untilStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) # Check if there's resumption token exist in the formData newToken = None if self.__currentToken is not None: start = int(self.__currentToken.getStart()) totalFound = int(self.__currentToken.getTotalFound()) nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: newToken = self.__currentToken newToken.resetExpiry(self.__sessionExpiry) newToken.setStart(nextTokenStart) # or start a new resumption token else: start = 0 newToken = ResumptionToken(None, recordsPerPage, \ self.__metadataPrefix, self.__sessionExpiry) req.setParam("start", str(start)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: newToken = None # If an ID was requested, and not found, this is an error if id is not None and id != "": self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id) else: self.__request.setError("noRecordsMatch", "No records match this request") # We need to store this for NEW tokens elif self.__currentToken is None: # Assuming there are enough results to even keep the token if newToken.getStart() < totalFound: newToken.setTotalFound(totalFound) else: newToken = None # Check if we need to remove the resumption token else: if self.__result.getResults().size() < recordsPerPage: self.tokensDB.removeToken(self.__currentToken) # Store/update the resumption token if newToken is not None: # Brand new token if self.__currentToken is None: self.tokensDB.storeToken(newToken) # Or update an old token else: self.tokensDB.updateToken(newToken) self.__currentToken = newToken def getToken(self): if self.isInView(self.__metadataPrefix): return self.__currentToken return None def getMetadataFormats(self): if self.oaiConfig is None: self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) return self.oaiConfig def encodeXml(self, string): return StringEscapeUtils.escapeXml(string); def getPayload(self, oid, metadataFileName): # First get the Object from storage object = None try: object = self.services.getStorage().getObject(oid) except StorageException, e: return None # Check whether the payload exists try: return object.getPayload(metadataFileName) except StorageException, e: return None
def __search(self): requireEscape = False recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) query = None pagePath = self.__portal.getName() + "/" + self.pageName if query is None or query == "": query = self.formData.get("query") requireEscape = True if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query if requireEscape: query = self.__escapeQuery(query) query = "%s:%s" % (self.__searchField, query) self.sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query if requireEscape: anotarQuery = self.__escapeQuery(anotarQuery) annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() self.services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = SolrResult(ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getResults() ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.getFirst("annotatesUri") ids.add(annotatesUri) self.log.debug("Found annotation for %s" % annotatesUri) # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort())) req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", self.__sortBy) # setup facets if self.__useSessionNavigation: action = self.formData.get("verb") value = self.formData.get("value") fq = self.sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) else: navUri = uri[len(pagePath):] self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri) savedfq = self.sessionState.get("savedfq") limits = [] if savedfq: limits.extend(savedfq) if fq: limits.extend(fq) self.sessionState.set("savedfq", limits) for q in fq: req.addParam("fq", URLDecoder.decode(q, "UTF-8")) portalQuery = self.__portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", 'item_type:"object"') if req.getParams("fq"): self.__selected = ArrayList(req.getParams("fq")) if self.__useSessionNavigation: self.sessionState.set("fq", self.__selected) self.sessionState.set("searchQuery", portalSearchQuery) self.sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not self.page.authentication.is_admin(): current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req.addParam("fq", security_query) req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) #print " * search.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, self.__result.getNumFound(), self.__portal.recordsPerPage)
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.__latest = None self.__steps = None self.__alerts = None self.__result = None self.__stages = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.velocityContext["log"].error( "ERROR: Requested context entry '{}' doesn't exist", index) return None def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get( self.vc("portalId")).getQuery() portalSearchQuery = Services.getPortalManager().get( self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join( security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") req.setParam("facet", "true") req.setParam("facet.field", "workflow_step") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) steps = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__steps = steps.getFacets().get("workflow_step") wfConfig = JsonSimple( FascinatorHome.getPathFile("harvest/workflows/dataset.json")) jsonStageList = wfConfig.getJsonSimpleList(["stages"]) stages = [] for jsonStage in jsonStageList: wfStage = WorkflowStage(jsonStage, self.__steps) stages.append(wfStage) self.__stages = stages req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "25") req.setParam("sort", "last_modified desc, f_dc_title asc") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) req.addParam("fq", "workflow_step:%s" % stages[0].getName()) out = ByteArrayOutputStream() indexer.search(req, out) self.__alerts = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest( "last_modified:[NOW-1MONTH TO *] AND workflow_step:live") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) self.vc("sessionState").set("fq", 'item_type:"object"') def getLatest(self): return self.__latest.getResults() def getAlerts(self): return self.__alerts.getResults() def getItemCount(self): return self.__result.getNumFound() def getStages(self): return self.__stages
class ReportResultData: def __init__(self): pass def __activate__(self, context): self.__reportResult = None self.auth = context["page"].authentication self.request = context["request"] self.response = context["response"] self.log = context["log"] self.reportManager = context["Services"].getService("reportManager") self.indexer = context['Services'].getIndexer() self.metadata = context["metadata"] self.systemConfig = context["systemConfig"] self.__rowsFound = 0 self.__rowsFoundSolr = 0 self.processed_results_list = [] self.errorMsg = "" if (self.auth.is_logged_in()): if (self.auth.is_admin() == True): self.buildDashboard(context) else: self.errorMsg = "Requires Admin / Librarian / Reviewer access." else: self.errorMsg = "Please login." self.__reportSearch() def __reportSearch(self): self.reportId = self.request.getParameter("id") self.format = self.request.getParameter("format") self.report = self.reportManager.getReports().get(self.reportId) self.reportQuery = self.report.getQueryAsString() self.log.debug("Report query: " + self.reportQuery) #Get a total number of records try: out = ByteArrayOutputStream() recnumreq = SearchRequest(self.reportQuery) recnumreq.setParam("rows", "0") self.indexer.search(recnumreq, out) recnumres = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__rowsFoundSolr = "%s" % recnumres.getNumFound() except: self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % ( sys.exc_info()[0], sys.exc_info()[1]) self.log.error( "Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return #Setup the main query req = SearchRequest(self.reportQuery) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", self.__rowsFoundSolr) try: #Now do the master search out = ByteArrayOutputStream() self.indexer.search(req, out) self.__reportResult = SolrResult( ByteArrayInputStream(out.toByteArray())) self.__checkResults() except: self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % ( sys.exc_info()[0], sys.exc_info()[1]) self.log.error( "Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return #At this point the display template has enough to go with. #We just need to handle the CSV now if (self.format == "csv"): #Setup the main query - we need to requery to make sure we return #only the required fields. We'll use the specific IDs that met the #__checkResults check req = SearchRequest(self.reportQuery) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", self.__rowsFoundSolr) req.setParam("csv.mv.separator", ";") #we need to get a list of the matching IDs from Solr #this doesn't work for long queries so it's abandoned #but left here commented to make sure we don't try it again #idQry = "" #for item in self.getProcessedResultsList(): # idQry += item.get("id") + " OR " #req.setParam("fq", 'id:(%s)' % idQry[:len(idQry)-4]) #Create a list of IDs for reference when preparing the CSV idQryList = [] for item in self.getProcessedResultsList(): idQryList.append(item.get("id")) #Setup SOLR query with the required fields self.fields = self.systemConfig.getArray("redbox-reports", "csv-output-fields") #We must have an ID field and it must be the first field fieldString = "id," if self.fields is not None: for field in self.fields: fieldString = fieldString + field.get("field-name") + "," fieldString = fieldString[:-1] req.setParam("fl", fieldString) out = ByteArrayOutputStream() try: self.indexer.search(req, out, self.format) except: #We can't get the result back from SOLR so fail back to the template display self.errorMsg = "Query failure. Failed to load the data - this issue has been logged (%s - %s)." % ( sys.exc_info()[0], sys.exc_info()[1]) self.log.error( "Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return try: csvResponseString = String(out.toByteArray(), "utf-8") csvResponseLines = csvResponseString.split("\n") except: #We can't get the result back from SOLR so fail back to the template display self.errorMsg = "Query failure. Failed to prepare the CSV - this issue has been logged (%s - %s)." % ( sys.exc_info()[0], sys.exc_info()[1]) self.log.error( "Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return fileName = self.urlEncode(self.report.getLabel()) self.log.debug("Generating CSV report with file name: " + fileName) self.response.setHeader("Content-Disposition", "attachment; filename=%s.csv" % fileName) sw = StringWriter() parser = CSVParser() writer = CSVWriter(sw) count = 0 prevLine = "" badRowFlag = False for line in csvResponseLines: if badRowFlag: #In this section of code we'll handle errors by either trying to fix the problem #or by adding an error line in the CSV. We'll then move to the next row and keep going try: self.log.debug( "Reporting - trying to append the previous line with the previous faulty one. Line appears as: %s" % prevLine + line) csvLine = parser.parseLine(prevLine + line) badRowFlag = False prevLine = "" self.log.debug( "Reporting - remedy appears to have worked. Line appears as: %s" % prevLine + line) except: #We tried to rescue the file but failed on the second run so give up writer.writeNext( ["Failed to transfer record to CSV - check logs"]) self.log.error( "Reporting threw an exception (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], prevLine + line)) else: try: csvLine = parser.parseLine(line) badRowFlag = False prevLine = "" except: #This can happen if there's a newline in the index data #so we raise the badRowFlag and see if we can join this #row to the next one to fix it self.log.debug( "Reporting threw an exception but I'll see if it's just a formatting issue (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], line)) badRowFlag = True prevLine = line continue if count == 0: #Header row count += 1 for idx, csvValue in enumerate(csvLine): csvLine[idx] = self.findDisplayLabel(csvValue) elif csvLine[0] not in idQryList: #ignore continue writer.writeNext(csvLine) #Now send off the CSV self.out = self.response.getOutputStream("text/csv") self.out.print(sw.toString()) self.out.close() def __checkResults(self): #This is a fix, required because our SOLR index doesn't support #all of the required reporting criteria - specifically exact/contains self.processed_results_list = [] if self.__reportResult is None: return #Get the report criteria criteria = self.report.getCriteria() #For each result item we need to check that it matches the criteria for item in self.getReportResult(): #Use last check to assist in the left-to-right check of operators lastCheck = True dropResultFlag = False #For each criteria item for criteria_item in criteria.getCriteria(): #If the last criteria item didn't check out and the AND op is used, the record doesn't make it if not lastCheck and criteria_item.getOperator( ) == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_AND: dropResultFlag = True break # sanitise solr field criteria_item.setSolr_field( String(criteria_item.getSolr_field()).replace("\\", "")) thisCheck = False if self.__checkResultsNull(criteria_item, item): if self.__checkResultsMatch(criteria_item, item): thisCheck = True #If this criteria item and the last one didn't check out and the OR op is used, the record doesn't make it if (lastCheck or thisCheck) and criteria_item.getOperator( ) == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_OR: dropResultFlag = False elif (lastCheck and thisCheck) and criteria_item.getOperator( ) == SearchCriteriaListing.KEY_CRITERIA_LOGICAL_OP_AND: dropResultFlag = False else: #This row doesn't match dropResultFlag = True break lastCheck = thisCheck #End of criteria check loop if not dropResultFlag: #Copy over to the new listing self.processed_results_list.append(item) self.__rowsFound = len(self.processed_results_list) def __checkResultsNull(self, criteria_item, item): # Check Null criteria if criteria_item.getAllowNulls() == "field_include_null": #If the query criteria allows nulls and the field is null, true if item.get(criteria_item.getSolr_field()) is None: return True else: return False if item.get(criteria_item.getSolr_field()) is None: return False else: return True def __checkResultsMatch(self, criteria_item, item): if criteria_item.getAllowNulls() == "field_include_null": #If the query criteria allows nulls and the field is null, true if item.get(criteria_item.getSolr_field()) is None: return True #Some fields are lists so just handle lists solrvallist = ArrayList() solrval = item.getString(None, criteria_item.getSolr_field()) if solrval is None: solrvallist = item.getList(criteria_item.getSolr_field()) else: solrvallist.add(solrval) #If the query's matching criteria uses 'equals', check that it's an exact match for solrval in solrvallist: if criteria_item.getMatchingOperator() == "field_match": if String(String(solrval).trim()).equalsIgnoreCase( String(criteria_item.getValue()).trim()): #self.log.debug("Matched at: field_match --> %s == %s" %(solrval, criteria_item.getValue())) #self.log.debug("criteria_item.getSolr_field() -> " + criteria_item.getSolr_field()) #self.log.debug("solrvallist:%s" % solrvallist ) return True else: #This is a contains search if solrval.lower().find( criteria_item.getValue().lower()) != -1: return True return False def getProcessedResultsList(self): return self.processed_results_list def findDisplayLabel(self, csvValue): if self.fields is not None: for field in self.fields: if field.get("field-name") == csvValue: return field.get("label") return csvValue def getErrorMsg(self): return self.errorMsg def buildDashboard(self, context): self.velocityContext = context def getReportResult(self): return self.__reportResult.getResults() def getReportName(self): return self.report.getReportName() def getReportLabel(self): return self.report.getLabel() def urlEncode(self, text): return URLEncoder.encode(text, "utf-8") def escapeHtml(self, value): if value: return StringEscapeUtils.escapeHtml(value) or "" return "" def getRowsFound(self): return self.__rowsFound