def handleQuery(self, query, fieldName, formatStr): out = ByteArrayOutputStream() req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): results = res.getResults() for searchRes in results: searchResList = searchRes.getList(fieldName) if (searchResList.isEmpty()==False): for hit in searchResList: if self.term is not None: if hit.find(self.term) != -1: hits.add(hit) else: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print(","+formatStr % {"hit":hit}) else: self.writer.print(formatStr % {"hit":hit}) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def export(self, exportType): exportQuery = "%s:%s" % (self.facetField, self.facetFieldValue) outputType = "text/%s; charset=UTF-8" % type responseHeader = "attachment; filename=%s.%s" % (self.facetFieldValue, exportType) try: out = ByteArrayOutputStream() recnumreq = SearchRequest(exportQuery) recnumreq.setParam("fl","create_timestamp") recnumreq.setParam("rows", "0") self.indexer.search(recnumreq, out) recnumres = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__rowsFoundSolr = "%s" % recnumres.getNumFound() except: self.errorMsg = "Export query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Export query threw an exception (package type was %s): %s - %s" % (self.facetFieldValue, sys.exc_info()[0], sys.exc_info()[1])) return out = ByteArrayOutputStream() req = SearchRequest(exportQuery) req.setParam("wt", exportType) req.setParam("rows", self.__rowsFoundSolr) self.indexer.search(req, out) self.response.setHeader("Content-Disposition", responseHeader) writer = self.response.getPrintWriter(outputType) writer.println(out.toString("UTF-8")) writer.close()
def handleWorkflowStep(self): out = ByteArrayOutputStream() req = SearchRequest("workflow_step_label:[* TO *]" ) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): recordTypeResults = res.getResults() for recordTypeResult in recordTypeResults: recordTypeList = recordTypeResult.getList("workflow_step_label") if (recordTypeList.isEmpty()==False): for hit in recordTypeList: hits.add(hit) self.writer.println("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.println(",{\"value\": \"%s\",\n\"label\": \"%s\"}" % (hit,hit)) else: self.writer.println("{\"value\": \"%s\",\n\"label\": \"%s\"}" % (hit,hit)) hitnum += 1 self.writer.println("]") else: self.writer.println("[\"\"]") self.writer.close()
def handleQuery(self, query, fieldName, formatStr): out = ByteArrayOutputStream() req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): results = res.getResults() for searchRes in results: searchResList = searchRes.getList(fieldName) if (searchResList.isEmpty() == False): for hit in searchResList: if self.term is not None: if hit.find(self.term) != -1: hits.add(hit) else: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print("," + formatStr % {"hit": hit}) else: self.writer.print(formatStr % {"hit": hit}) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def handleGrantNumber(self): out = ByteArrayOutputStream() req = SearchRequest("grant_numbers:%s*" % self.term) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", "1000") self.indexer.search(req, out) res = SolrResult(ByteArrayInputStream(out.toByteArray())) hits = HashSet() if (res.getNumFound() > 0): creatorResults = res.getResults() for creatorRes in creatorResults: creatorList = creatorRes.getList("grant_numbers") if (creatorList.isEmpty()==False): for hit in creatorList: hits.add(hit) self.writer.print("[") hitnum = 0 for hit in hits: if (hitnum > 0): self.writer.print(",\"%s\"" % hit) else: self.writer.print("\"%s\"" % hit) hitnum += 1 self.writer.print("]") else: self.writer.println("[\"\"]") self.writer.close()
def _searchSets(self, startPage=1): req = SearchRequest(self.getQuery()) req.setParam("fq", 'item_type:"object"') req.setParam("rows", str(self.getRecordsPerPage())) req.setParam("start", str((startPage - 1) * self.getRecordsPerPage())) req.addParam("fq", self.getFilterQuery()) req.setParam("fl", self.getReturnFields()) req.setParam("sort", "date_object_modified desc, f_dc_title asc") if not self.isAdmin(): req.addParam("fq", self.getSecurityQuery()) out = ByteArrayOutputStream() self.indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) self._setPaging(result.getNumFound()) result.getJsonObject().put("lastPage", str(self.paging.getLastPage())) result.getJsonObject().put("curPage", str(startPage)) return result
class ReportsData: def __init__(self): pass def __activate__(self, context): #import pydevd;pydevd.settrace() self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.log = context["log"] self.__harvestList = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() # Security prep work isAdmin = self.vc("page").authentication.is_admin() if not isAdmin: print "ERROR: User is not an admin '" return None req = SearchRequest('eventType:harvestStart') req.setParam("rows", "100") out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__harvestList = SolrResult(ByteArrayInputStream(out.toByteArray())) def getHarvestlist(self): return self.__harvestList.getResults() def getItemCount(self): return self.__harvestList.getNumFound()
class ReportsData: def __init__(self): pass def __activate__(self, context): #import pydevd;pydevd.settrace() self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.log = context["log"] self.__harvestList = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() # Security prep work isAdmin = self.vc("page").authentication.is_admin() if not isAdmin: print "ERROR: User is not an admin '" return None req = SearchRequest('eventType:harvestStart') req.setParam("rows", "100") out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__harvestList = SolrResult(ByteArrayInputStream( out.toByteArray())) def getHarvestlist(self): return self.__harvestList.getResults() def getItemCount(self): return self.__harvestList.getNumFound()
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.__latest = None self.__steps = None self.__alerts = None self.__result = None self.__stages = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.velocityContext["log"].error( "ERROR: Requested context entry '{}' doesn't exist", index) return None def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get( self.vc("portalId")).getQuery() portalSearchQuery = Services.getPortalManager().get( self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join( security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") req.setParam("facet", "true") req.setParam("facet.field", "workflow_step") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) steps = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__steps = steps.getFacets().get("workflow_step") wfConfig = JsonSimple( FascinatorHome.getPathFile("harvest/workflows/dataset.json")) jsonStageList = wfConfig.getJsonSimpleList(["stages"]) stages = [] for jsonStage in jsonStageList: wfStage = WorkflowStage(jsonStage, self.__steps) stages.append(wfStage) self.__stages = stages req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "25") req.setParam("sort", "last_modified desc, f_dc_title asc") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) req.addParam("fq", "workflow_step:%s" % stages[0].getName()) out = ByteArrayOutputStream() indexer.search(req, out) self.__alerts = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest( "last_modified:[NOW-1MONTH TO *] AND workflow_step:live") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) self.vc("sessionState").set("fq", 'item_type:"object"') def getLatest(self): return self.__latest.getResults() def getAlerts(self): return self.__alerts.getResults() def getItemCount(self): return self.__result.getNumFound() def getStages(self): return self.__stages
class SearchData: def __activate__(self, context): self.services = context["Services"] self.page = context["page"] self.formData = context["formData"] self.portalId = context["portalId"] self.sessionState = context["sessionState"] self.request = context["request"] self.pageName = context["pageName"] self.log = context["log"] self.__portal = context["page"].getPortal() self.__useSessionNavigation = self.__portal.getBoolean( True, ["portal", "use-session-navigation"]) self.__result = None if self.__useSessionNavigation: self.__pageNum = self.sessionState.get("pageNum", 1) else: self.__pageNum = 1 self.__selected = ArrayList() self.__fqParts = [] self.__searchField = self.formData.get("searchField", "full_text") self.__sortField = self.formData.get("sort-field") self.__sortOrder = self.formData.get("sort-order") if not (self.__sortField or self.__sortOrder): # use form data not specified, check session sortField = self.__portal.sortFieldDefault or "score" sortOrder = self.__portal.sortFieldDefaultOrder or "desc" #print "f:%s,o:%s" % (sortField, sortOrder) self.__sortField = self.sessionState.get("sortField", sortField) self.__sortOrder = self.sessionState.get("sortOrder", sortOrder) self.sessionState.set("sortField", self.__sortField) self.sessionState.set("sortOrder", self.__sortOrder) self.__sortBy = "%s %s" % (self.__sortField, self.__sortOrder) # reset the query and facet selections when changing views lastPortalId = self.sessionState.get("lastPortalId") if lastPortalId != self.portalId: self.log.info( "Portal changed. Resetting saved search values in session.") self.sessionState.remove("fq") self.sessionState.remove("pageNum") self.sessionState.remove("sortField") self.sessionState.remove("sortOrder") self.__pageNum = 1 self.sessionState.set("lastPortalId", self.portalId) self.__search() def usingSessionNavigation(self): return self.__restful def getPortalName(self): return self.__portal.getDescription() def getSearchField(self): return self.__searchField def __search(self): requireEscape = False recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) query = None pagePath = self.__portal.getName() + "/" + self.pageName if query is None or query == "": query = self.formData.get("query") requireEscape = True if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query if requireEscape: query = self.__escapeQuery(query) query = "%s:%s" % (self.__searchField, query) self.sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query if requireEscape: anotarQuery = self.__escapeQuery(anotarQuery) annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() self.services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = SolrResult( ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getResults() ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.getFirst("annotatesUri") ids.add(annotatesUri) self.log.debug("Found annotation for %s" % annotatesUri) # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort())) req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", self.__sortBy) # setup facets if self.__useSessionNavigation: action = self.formData.get("verb") value = self.formData.get("value") fq = self.sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) else: navUri = uri[len(pagePath):] self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri) savedfq = self.sessionState.get("savedfq") limits = [] if savedfq: limits.extend(savedfq) if fq: limits.extend(fq) self.sessionState.set("savedfq", limits) for q in fq: req.addParam("fq", URLDecoder.decode(q, "UTF-8")) portalQuery = self.__portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", 'item_type:"object"') if req.getParams("fq"): self.__selected = ArrayList(req.getParams("fq")) if self.__useSessionNavigation: self.sessionState.set("fq", self.__selected) self.sessionState.set("searchQuery", portalSearchQuery) self.sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not self.page.authentication.is_admin(): current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join( security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req.addParam("fq", security_query) ## uncomment to ensure guest users not logged in cannot see alerts in browse page # self.filterOutWorkflowStepForUnAuth(req, "inbox") req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) self.log.debug(" * search.py: %s, page: %s" % (req.toString(), self.__pageNum)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, self.__result.getNumFound(), self.__portal.recordsPerPage) def filterOutWorkflowStepForUnAuth(self, req, workflow_step): if self.page.authentication.get_username( ) == 'guest' and not self.page.authentication.is_logged_in(): step_filter = [workflow_step] no_alerts_for_non_auth = ' - workflow_step:("' + ''.join( step_filter) + '")' req.addParam("fq", no_alerts_for_non_auth) def __escapeQuery(self, q): temp = "" chars = "+-&|!(){}[]^\"~*?:\\" for c in q: if c in chars: temp += "\%s" % c else: temp += c return temp # eq = q # # escape all solr/lucene special chars # # from http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Escaping%20Special%20Characters # for c in "+-&|!(){}[]^\"~*?:\\": # eq = eq.replace(c, "\\%s" % c) # ## Escape UTF8 # try: # return URLEncoder.encode(eq, "UTF-8") # except UnsupportedEncodingException, e: # print "Error during UTF8 escape! ", repr(eq) # return eq def getQueryTime(self): return int(self.__result.getQueryTime()) / 1000.0 def getPaging(self): return self.__paging def getResult(self): return self.__result def getFacetField(self, key): return self.__portal.facetFields.get(key) def getFacetName(self, key): return self.__portal.facetFields.get(key).getString(None, ["label"]) def getFacetCounts(self, key): if self.__useSessionNavigation: facetData = self.__result.getFacets() if facetData is None: return LinkedHashMap() if not facetData.containsKey(key): return LinkedHashMap() return facetData.get(key).values() else: return LinkedHashMap() # TODO : What were these doing? Hiding file path facets unless some facets are selected? #if name.find("/") == -1 or self.hasSelectedFacets(): # values.put(name, count) def getFacetDisplay(self): return self.__portal.facetDisplay def hasSelectedFacets(self): return (self.__selected is not None and len(self.__selected) > 1) and \ not (self.__portal.query in self.__selected and len(self.__selected) == 2) def getSelectedFacets(self): return self.__selected def isPortalQueryFacet(self, fq): return fq == self.__portal.query def isSelected(self, fq): return fq in self.__selected def getSelectedFacetIds(self): ## Returns a native Python array ## Python has indicator of u' for unicode string which causes JS complain. ## Because MD5 hash is made from letters we can encode them in ascii return [ DigestUtils.md5Hex(fq).encode("ascii", "ignore") for fq in self.__selected ] def getFileName(self, path): return os.path.splitext(os.path.basename(path))[0] def getFacetQuery(self, name, value): return '%s:"%s"' % (name, value) # Packaging support def getActiveManifestTitle(self): return self.__getActiveManifest().getTitle() def getActiveManifestId(self): return self.sessionState.get("package/active/id") def getSelectedItemsCount(self): return self.__getActiveManifest().size() def isSelectedForPackage(self, oid): result = self.__getActiveManifest().getNode("node-%s" % oid) return (result is not None) def getManifestItemTitle(self, oid, defaultValue): result = self.__getActiveManifest().getNode("node-%s" % oid) if result is None: return defaultValue return result.getTitle() def __getActiveManifest(self): activeManifest = self.sessionState.get("package/active") if not activeManifest: activeManifest = Manifest(None) activeManifest.setTitle("New package") activeManifest.setViewId(self.__portal.getName()) self.sessionState.set("package/active", activeManifest) return activeManifest def isSelectableForPackage(self, oid): return oid != self.getActiveManifestId() def getSortFields(self): return self.__portal.sortFields def getSortField(self): return self.__sortField def getSortOrder(self): return self.__sortOrder # RESTful style URL support methods def getPageQuery(self, page): prefix = "" if self.__fqParts: prefix = "/" + "/".join(self.__fqParts) suffix = "" if page > 1: suffix = "/page/%s" % page return prefix + suffix def getFacetQueryUri(self, name, value): return "%s/%s" % (name, value) def getFacetValue(self, facetValue): return facetValue.split("/")[-1] def getLimitQueryWith(self, fq): limits = ArrayList(self.__fqParts) limits.add("category/" + fq) return "/".join(limits) def getFacetIndent(self, facetValue): return len(facetValue.split("/")) def getLimitQueryWithout(self, fq): limits = ArrayList(self.__fqParts) limits.remove("category/" + fq) if limits.isEmpty(): return "" return "/".join(limits) def __parseUri(self, uri): page = 1 fq = [] fqParts = [] if uri != "": parts = uri.split("/") partType = None facetKey = None facetValues = None for part in parts: if partType == "page": facetKey = None page = int(part) elif partType == "category": partType = "category-value" facetValues = None facetKey = part elif partType == "category-value": if facetValues is None: facetValues = [] if part in ["page", "category"]: partType = part facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues)) fq.append(facetQuery) fqParts.append("category/%s/%s" % (facetKey, "/".join(facetValues))) facetKey = None facetValues = None else: facetValues.append(URLDecoder.decode(part)) else: partType = part if partType == "category-value": facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues)) fq.append(facetQuery) fqParts.append("category/%s/%s" % (facetKey, "/".join(facetValues))) return page, fq, fqParts
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.__latest = None self.__mine = None self.__workflows = None self.__result = None self.__steps = None self.__selfservicesStages = None self.__search() # Get from velocity context Mint version def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() portalQuery = self.services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = self.services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("last_modified:[NOW-1MONTH TO *]") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest(owner_query) req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('workflow_security:"' + current_user + '"') req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.vc("sessionState").set("fq", 'item_type:"object"') #sessionState.set("query", portalQuery.replace("\"", "'")) # Load in the services UI workflow selfSubmitWfConfig = JsonSimple(FascinatorHome.getPathFile("harvest/workflows/servicesUI.json")) selfSubmitJsonStageList = selfSubmitWfConfig.getJsonSimpleList(["stages"]) servicesStages = [] for jsonStage in selfSubmitJsonStageList: wfStage = WorkflowStage(jsonStage, self.__steps) servicesStages.append(wfStage) self.__selfservicesStages = servicesStages self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) def getLatest(self): return self.__latest.getResults() def getMine(self): return self.__mine.getResults() def getWorkflows(self): return self.__workflows.getResults() def getItemCount(self): return self.__result.getNumFound() def getServicesStages(self): return self.__servicesStages
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.__latest = None self.__steps = None self.__alerts = None self.__result = None self.__stages = None self.__embargoes = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.velocityContext["log"].error("ERROR: Requested context entry '{}' doesn't exist", index) return None def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = Services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") req.setParam("facet", "true") req.setParam("facet.field", "workflow_step") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) steps = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__steps = steps.getFacets().get("workflow_step") wfConfig = JsonSimple(FascinatorHome.getPathFile("harvest/workflows/dataset.json")) jsonStageList = wfConfig.getJsonSimpleList(["stages"]) stages = [] for jsonStage in jsonStageList: wfStage = WorkflowStage(jsonStage, self.__steps) stages.append(wfStage) self.__stages = stages req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "25") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) req.addParam("fq", "workflow_step:%s" % stages[0].getName()) out = ByteArrayOutputStream() indexer.search(req, out) self.__alerts = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("last_modified:[NOW-1MONTH TO *] AND workflow_step:live") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) self._searchEmbargoes() self.vc("sessionState").set("fq", 'item_type:"object"') def getLatest(self): return self.__latest.getResults() def getAlerts(self): return self.__alerts.getResults() def getItemCount(self): return self.__result.getNumFound() def getStages(self): return self.__stages def getEmbargoes(self): return self.__embargoes.getResults() def _searchEmbargoes(self): req = SearchRequest("item_type:object") req.setParam("fq", 'redbox\:embargo.redbox\:isEmbargoed:on') req.addParam("fq", 'workflow_step:final-review') req.addParam("fq", "") req.setParam("fl","id,date_embargoed,dc_title") req.setParam("rows", "25") req.setParam("sort", "date_embargoed asc, dc_title asc"); out = ByteArrayOutputStream() indexer = Services.getIndexer() indexer.search(req, out) self.__embargoes = SolrResult(ByteArrayInputStream(out.toByteArray())) self.velocityContext["log"].info("searchEmbargoes call ended" + str(self.__embargoes)) def formatDate(self, date): dfSource = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss") dfTarget = SimpleDateFormat("dd/MM/yyyy") return dfTarget.format(dfSource.parse(date))
class OaiData: def __init__(self): self.tokensDB = None def __activate__(self, context): if self.tokensDB is None: self.tokensDB = TokensDatabase(context) # Set up configuration self.systemConfig = JsonSimpleConfig() self.oaiConfig = None self.getMetadataFormats() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None self.lastPage = False # Check if the OAI request has an overriding portal ('set') to the URL paramSet = self.vc("formData").get("set") self.__portalName = context["page"].getPortal().getName() illegalSet = False if paramSet is not None: portals = self.vc("page").getPortals().keySet() if portals.contains(paramSet): self.__portalName = paramSet else: illegalSet = True self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) # Check if there's a resumption token in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken is not None: # Split out the start component from the actual resumption token (resumptionTokenPart, start) = resumptionToken.strip().split(":") # This could still be be null self.__currentToken = self.tokensDB.getToken(resumptionTokenPart) # Code to handle null token is handled later on if self.__currentToken is not None: self.__currentToken.setStart(start) # Process/parse the request we've received for validity self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken) if self.getError() is None and illegalSet: self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet) # If there are no errors... and the request requires some additional # data (like a search result) do so now. Everything else can be # handled in the templates. if self.getError() is None and self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: # Find the metadata prefix requested self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() # Only list records if the metadata format is enabled in this view if self.isInView(self.__metadataPrefix): self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def isInView(self, format, view=None): # Sanity check if format is None or format == "": return False # Default to current poral if view is None: view = self.__portalName # Make sure there is some config for this format formatConfig = self.getMetadataFormats().get(format) if formatConfig is None: return False # Is it visible everywhere? allViews = formatConfig.getBoolean(False, ["enabledInAllViews"]) if allViews: return True # Check if it is visible in this view else: allowedViews = formatConfig.getStringList(["enabledViews"]) if view in allowedViews: return True # Rejection return False def getID(self, item): identifier = item.getFirst("oai_identifier") # Fallback to the default if identifier is None or identifier == "": return "oai:fascinator.usq.edu.au:" + item.getFirst("id") # Use the indexed value return identifier def isDeleted(self, item): return bool(item.getFirst("oai_deleted")) def getSet(self, item): set = item.getFirst("oai_set") # Fallback to the portal name if set is None or set == "": return self.__portalName # Use the required set return set def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator.usq.edu.au:"): idString = id.replace("oai:fascinator.usq.edu.au:", "") idString = self.__escapeQuery(idString) query = "id:" + idString # Or a custom OAI ID else: idString = self.__escapeQuery(id) query = "oai_identifier:" + idString req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Date data... is supplied fromDate = self.__request.getFromDate() untilDate = self.__request.getUntilDate() if fromDate is not None: fromStr = fromDate.isoformat() + "Z" self.log.debug("From Date: '{}'", fromStr) if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilStr) queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr) else: queryStr = "last_modified:[%s TO *]" % (fromStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) else: if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilDate.isoformat()) queryStr = "last_modified:[* TO %s]" % (untilStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) # Check if there's resumption token exist in the formData newToken = None if self.__currentToken is not None: start = int(self.__currentToken.getStart()) totalFound = int(self.__currentToken.getTotalFound()) nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: newToken = self.__currentToken newToken.resetExpiry(self.__sessionExpiry) newToken.setStart(nextTokenStart) # or start a new resumption token else: start = 0 newToken = ResumptionToken(None, recordsPerPage, self.__metadataPrefix, self.__sessionExpiry) req.setParam("start", str(start)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: newToken = None # If an ID was requested, and not found, this is an error if id is not None and id != "": self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id) else: self.__request.setError("noRecordsMatch", "No records match this request") # We need to store this for NEW tokens elif self.__currentToken is None: # Assuming there are enough results to even keep the token if newToken.getStart() < totalFound: newToken.setTotalFound(totalFound) else: newToken = None # Check if we need to remove the resumption token else: if (start + recordsPerPage) >= totalFound: self.tokensDB.removeToken(self.__currentToken) self.lastPage = True # Store/update the resumption token if newToken is not None: # Brand new token if self.__currentToken is None: self.tokensDB.storeToken(newToken) # Or update an old token else: self.tokensDB.updateToken(newToken) self.__currentToken = newToken def getToken(self): if self.isInView(self.__metadataPrefix) and not self.lastPage: return self.__currentToken return None def getMetadataFormats(self): if self.oaiConfig is None: self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) return self.oaiConfig def encodeXml(self, string): return StringEscapeUtils.escapeXml(string) def getPayload(self, oid, metadataFileName): # First get the Object from storage object = None try: object = self.services.getStorage().getObject(oid) except StorageException, e: return None # Check whether the payload exists try: return object.getPayload(metadataFileName) except StorageException, e: return None
class SearchData: def __activate__(self, context): self.services = context["Services"] self.page = context["page"] self.formData = context["formData"] self.portalId = context["portalId"] self.sessionState = context["sessionState"] self.request = context["request"] self.pageName = context["pageName"] self.log = context["log"] self.__portal = context["page"].getPortal() self.__useSessionNavigation = self.__portal.getBoolean(True, ["portal", "use-session-navigation"]) self.__result = None if self.__useSessionNavigation: self.__pageNum = self.sessionState.get("pageNum", 1) else: self.__pageNum = 1 self.__selected = ArrayList() self.__fqParts = [] self.__searchField = self.formData.get("searchField", "full_text") self.__sortField = self.formData.get("sort-field") self.__sortOrder = self.formData.get("sort-order") if not (self.__sortField or self.__sortOrder): # use form data not specified, check session sortField = self.__portal.sortFieldDefault or "score" sortOrder = self.__portal.sortFieldDefaultOrder or "desc" #print "f:%s,o:%s" % (sortField, sortOrder) self.__sortField = self.sessionState.get("sortField", sortField) self.__sortOrder = self.sessionState.get("sortOrder", sortOrder) self.sessionState.set("sortField", self.__sortField) self.sessionState.set("sortOrder", self.__sortOrder) self.__sortBy = "%s %s" % (self.__sortField, self.__sortOrder) # reset the query and facet selections when changing views lastPortalId = self.sessionState.get("lastPortalId") if lastPortalId != self.portalId: self.sessionState.remove("fq") self.sessionState.remove("pageNum") self.sessionState.remove("sortField") self.sessionState.remove("sortOrder") self.__pageNum = 1 self.sessionState.set("lastPortalId", self.portalId) self.__search() def usingSessionNavigation(self): return self.__restful def getPortalName(self): return self.__portal.getDescription() def getSearchField(self): return self.__searchField def __search(self): requireEscape = False recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) query = None pagePath = self.__portal.getName() + "/" + self.pageName if query is None or query == "": query = self.formData.get("query") requireEscape = True if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query if requireEscape: query = self.__escapeQuery(query) query = "%s:%s" % (self.__searchField, query) self.sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query if requireEscape: anotarQuery = self.__escapeQuery(anotarQuery) annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() self.services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = SolrResult(ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getResults() ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.getFirst("annotatesUri") ids.add(annotatesUri) self.log.debug("Found annotation for %s" % annotatesUri) # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort())) req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", self.__sortBy) # setup facets if self.__useSessionNavigation: action = self.formData.get("verb") value = self.formData.get("value") fq = self.sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) else: navUri = uri[len(pagePath):] self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri) savedfq = self.sessionState.get("savedfq") limits = [] if savedfq: limits.extend(savedfq) if fq: limits.extend(fq) self.sessionState.set("savedfq", limits) for q in fq: req.addParam("fq", URLDecoder.decode(q, "UTF-8")) portalQuery = self.__portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", 'item_type:"object"') if req.getParams("fq"): self.__selected = ArrayList(req.getParams("fq")) if self.__useSessionNavigation: self.sessionState.set("fq", self.__selected) self.sessionState.set("searchQuery", portalSearchQuery) self.sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not self.page.authentication.is_admin(): current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req.addParam("fq", security_query) req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) #print " * search.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, self.__result.getNumFound(), self.__portal.recordsPerPage) def __escapeQuery(self, q): temp = "" chars = "+-&|!(){}[]^\"~*?:\\" for c in q: if c in chars: temp += "\%s" % c else: temp += c return temp # eq = q # # escape all solr/lucene special chars # # from http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Escaping%20Special%20Characters # for c in "+-&|!(){}[]^\"~*?:\\": # eq = eq.replace(c, "\\%s" % c) # ## Escape UTF8 # try: # return URLEncoder.encode(eq, "UTF-8") # except UnsupportedEncodingException, e: # print "Error during UTF8 escape! ", repr(eq) # return eq def getQueryTime(self): return int(self.__result.getQueryTime()) / 1000.0; def getPaging(self): return self.__paging def getResult(self): return self.__result def getFacetField(self, key): return self.__portal.facetFields.get(key) def getFacetName(self, key): return self.__portal.facetFields.get(key).getString(None, ["label"]) def getFacetCounts(self, key): if self.__useSessionNavigation: facetData = self.__result.getFacets() if facetData is None: return LinkedHashMap() if not facetData.containsKey(key): return LinkedHashMap() return facetData.get(key).values() else: return LinkedHashMap() # TODO : What were these doing? Hiding file path facets unless some facets are selected? #if name.find("/") == -1 or self.hasSelectedFacets(): # values.put(name, count) def getFacetDisplay(self): return self.__portal.facetDisplay def hasSelectedFacets(self): return (self.__selected is not None and len(self.__selected) > 1) and \ not (self.__portal.query in self.__selected and len(self.__selected) == 2) def getSelectedFacets(self): return self.__selected def isPortalQueryFacet(self, fq): return fq == self.__portal.query def isSelected(self, fq): return fq in self.__selected def getSelectedFacetIds(self): return [md5.new(fq).hexdigest() for fq in self.__selected] def getFileName(self, path): return os.path.splitext(os.path.basename(path))[0] def getFacetQuery(self, name, value): return '%s:"%s"' % (name, value) # Packaging support def getActiveManifestTitle(self): return self.__getActiveManifest().getTitle() def getActiveManifestId(self): return self.sessionState.get("package/active/id") def getSelectedItemsCount(self): return self.__getActiveManifest().size() def isSelectedForPackage(self, oid): result = self.__getActiveManifest().getNode("node-%s" % oid) return (result is not None) def getManifestItemTitle(self, oid, defaultValue): result = self.__getActiveManifest().getNode("node-%s" % oid) if result is None: return defaultValue return result.getTitle() def __getActiveManifest(self): activeManifest = self.sessionState.get("package/active") if not activeManifest: activeManifest = Manifest(None) activeManifest.setTitle("New package") activeManifest.setViewId(self.__portal.getName()) self.sessionState.set("package/active", activeManifest) return activeManifest def isSelectableForPackage(self, oid): return oid != self.getActiveManifestId() def getSortFields(self): return self.__portal.sortFields def getSortField(self): return self.__sortField def getSortOrder(self): return self.__sortOrder # RESTful style URL support methods def getPageQuery(self, page): prefix = "" if self.__fqParts: prefix = "/" + "/".join(self.__fqParts) suffix = "" if page > 1: suffix = "/page/%s" % page return prefix + suffix def getFacetQueryUri(self, name, value): return "%s/%s" % (name, value) def getFacetValue(self, facetValue): return facetValue.split("/")[-1] def getLimitQueryWith(self, fq): limits = ArrayList(self.__fqParts) limits.add("category/" + fq) return "/".join(limits) def getFacetIndent(self, facetValue): return len(facetValue.split("/")) def getLimitQueryWithout(self, fq): limits = ArrayList(self.__fqParts) limits.remove("category/" + fq) if limits.isEmpty(): return "" return "/".join(limits) def __parseUri(self, uri): page = 1 fq = [] fqParts = [] if uri != "": parts = uri.split("/") partType = None facetKey = None facetValues = None for part in parts: if partType == "page": facetKey = None page = int(part) elif partType == "category": partType = "category-value" facetValues = None facetKey = part elif partType == "category-value": if facetValues is None: facetValues = [] if part in ["page", "category"]: partType = part facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues)) fq.append(facetQuery) fqParts.append("category/%s/%s" % (facetKey, "/".join(facetValues))) facetKey = None facetValues = None else: facetValues.append(URLDecoder.decode(part)) else: partType = part if partType == "category-value": facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues)) fq.append(facetQuery) fqParts.append("category/%s/%s" % (facetKey, "/".join(facetValues))) return page, fq, fqParts
def __reportSearch(self): self.reportId = self.request.getParameter("id") self.format = self.request.getParameter("format") self.report = self.reportManager.getReports().get(self.reportId) self.reportQuery = self.report.getQueryAsString() self.log.debug("Report query: " + self.reportQuery) #Get a total number of records try: out = ByteArrayOutputStream() recnumreq = SearchRequest(self.reportQuery) recnumreq.setParam("rows", "0") self.indexer.search(recnumreq, out) recnumres = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__rowsFoundSolr = "%s" % recnumres.getNumFound() except: self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return #Setup the main query req = SearchRequest(self.reportQuery) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", self.__rowsFoundSolr) try: #Now do the master search out = ByteArrayOutputStream() self.indexer.search(req, out) self.__reportResult = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__checkResults() except: self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return #At this point the display template has enough to go with. #We just need to handle the CSV now if (self.format == "csv"): #Setup the main query - we need to requery to make sure we return #only the required fields. We'll use the specific IDs that met the #__checkResults check req = SearchRequest(self.reportQuery) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", self.__rowsFoundSolr) req.setParam("csv.mv.separator",";") #we need to get a list of the matching IDs from Solr #this doesn't work for long queries so it's abandoned #but left here commented to make sure we don't try it again #idQry = "" #for item in self.getProcessedResultsList(): # idQry += item.get("id") + " OR " #req.setParam("fq", 'id:(%s)' % idQry[:len(idQry)-4]) #Create a list of IDs for reference when preparing the CSV idQryList = [] for item in self.getProcessedResultsList(): idQryList.append(item.get("id")) #Setup SOLR query with the required fields self.fields = self.systemConfig.getArray("redbox-reports","csv-output-fields") #We must have an ID field and it must be the first field fieldString = "id," if self.fields is not None: for field in self.fields: fieldString = fieldString+ field.get("field-name")+"," fieldString = fieldString[:-1] req.setParam("fl",fieldString) out = ByteArrayOutputStream() try: self.indexer.search(req, out, self.format) except: #We can't get the result back from SOLR so fail back to the template display self.errorMsg = "Query failure. Failed to load the data - this issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return try: csvResponseString = String(out.toByteArray(),"utf-8") csvResponseLines = csvResponseString.split("\n") except: #We can't get the result back from SOLR so fail back to the template display self.errorMsg = "Query failure. Failed to prepare the CSV - this issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1]) self.log.error("Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return fileName = self.urlEncode(self.report.getLabel()) self.log.debug("Generating CSV report with file name: " + fileName) self.response.setHeader("Content-Disposition", "attachment; filename=%s.csv" % fileName) sw = StringWriter() parser = CSVParser() writer = CSVWriter(sw) count = 0 prevLine = "" badRowFlag = False for line in csvResponseLines: if badRowFlag: #In this section of code we'll handle errors by either trying to fix the problem #or by adding an error line in the CSV. We'll then move to the next row and keep going try: self.log.debug("Reporting - trying to append the previous line with the previous faulty one. Line appears as: %s" % prevLine + line) csvLine = parser.parseLine(prevLine + line) badRowFlag = False prevLine = "" self.log.debug("Reporting - remedy appears to have worked. Line appears as: %s" % prevLine + line) except: #We tried to rescue the file but failed on the second run so give up writer.writeNext(["Failed to transfer record to CSV - check logs"]) self.log.error("Reporting threw an exception (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], prevLine + line)) else: try: csvLine = parser.parseLine(line) badRowFlag = False prevLine = "" except: #This can happen if there's a newline in the index data #so we raise the badRowFlag and see if we can join this #row to the next one to fix it self.log.debug("Reporting threw an exception but I'll see if it's just a formatting issue (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], line)) badRowFlag = True prevLine = line continue if count == 0 : #Header row count += 1 for idx, csvValue in enumerate(csvLine): csvLine[idx] = self.findDisplayLabel(csvValue) elif csvLine[0] not in idQryList: #ignore continue writer.writeNext(csvLine) #Now send off the CSV self.out = self.response.getOutputStream("text/csv") self.out.print(sw.toString()) self.out.close()
def __activate__(self, context): self.log = context["log"] response = context["response"] writer = response.getPrintWriter("text/plain; charset=UTF-8") auth = context["page"].authentication sessionState = context["sessionState"] result = JsonObject() result.put("status", "error") result.put("message", "An unknown error has occurred") if auth.is_admin(): services = context["Services"] formData = context["formData"] func = formData.get("func") oid = formData.get("oid") portalId = formData.get("portalId") portalManager = services.portalManager if func == "reharvest": if oid: self.log.debug("Reharvesting object '{}'", oid) self.sendMessage(oid) result.put("status", "ok") result.put("message", "Object '%s' queued for reharvest") elif portalId: self.log.debug("Reharvesting view '{}'", portalId) sessionState.set("reharvest/running/" + portalId, "true") # TODO security filter - not necessary because this requires admin anyway? portal = portalManager.get(portalId) query = "*:*" if portal.query != "": query = portal.query if portal.searchQuery != "": if query == "*:*": query = portal.searchQuery else: query = query + " AND " + portal.searchQuery # query solr to get the objects to reharvest rows = 25 req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("rows", str(rows)) req.setParam("fl", "id") done = False count = 0 while not done: req.setParam("start", str(count)) out = ByteArrayOutputStream() services.indexer.search(req, out) json = SolrResult( ByteArrayInputStream(out.toByteArray())) objectIds = HashSet(json.getFieldList("id")) if not objectIds.isEmpty(): for oid in objectIds: self.sendMessage(oid) count = count + rows total = json.getNumFound() self.log.debug("Queued {} of {}...", min(count, total), total) done = (count >= total) sessionState.remove("reharvest/running/" + portalId) result.put("status", "ok") result.put( "message", "Objects in '%s' queued for reharvest" % portalId) else: response.setStatus(500) result.put("message", "No object or view specified for reharvest") elif func == "reindex": if oid: self.log.debug("Reindexing object '{}'", oid) self.sendMessage(oid) result.put("status", "ok") result.put("message", "Object '%s' queued for reindex" % oid) else: response.setStatus(500) result.put("message", "No object specified to reindex") else: response.setStatus(500) result.put("message", "Unknown action '%s'" % func) else: response.setStatus(500) result.put("message", "Only administrative users can access this API") writer.println(result.toString()) writer.close()
def __reportSearch(self): self.reportId = self.request.getParameter("id") self.format = self.request.getParameter("format") self.report = self.reportManager.getReports().get(self.reportId) self.reportQuery = self.report.getQueryAsString() self.log.debug("Report query: " + self.reportQuery) #Get a total number of records try: out = ByteArrayOutputStream() recnumreq = SearchRequest(self.reportQuery) recnumreq.setParam("rows", "0") self.indexer.search(recnumreq, out) recnumres = SolrResult(ByteArrayInputStream(out.toByteArray())) self.__rowsFoundSolr = "%s" % recnumres.getNumFound() except: self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % ( sys.exc_info()[0], sys.exc_info()[1]) self.log.error( "Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return #Setup the main query req = SearchRequest(self.reportQuery) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", self.__rowsFoundSolr) try: #Now do the master search out = ByteArrayOutputStream() self.indexer.search(req, out) self.__reportResult = SolrResult( ByteArrayInputStream(out.toByteArray())) self.__checkResults() except: self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % ( sys.exc_info()[0], sys.exc_info()[1]) self.log.error( "Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return #At this point the display template has enough to go with. #We just need to handle the CSV now if (self.format == "csv"): #Setup the main query - we need to requery to make sure we return #only the required fields. We'll use the specific IDs that met the #__checkResults check req = SearchRequest(self.reportQuery) req.setParam("fq", 'item_type:"object"') req.setParam("fq", 'workflow_id:"dataset"') req.setParam("rows", self.__rowsFoundSolr) req.setParam("csv.mv.separator", ";") #we need to get a list of the matching IDs from Solr #this doesn't work for long queries so it's abandoned #but left here commented to make sure we don't try it again #idQry = "" #for item in self.getProcessedResultsList(): # idQry += item.get("id") + " OR " #req.setParam("fq", 'id:(%s)' % idQry[:len(idQry)-4]) #Create a list of IDs for reference when preparing the CSV idQryList = [] for item in self.getProcessedResultsList(): idQryList.append(item.get("id")) #Setup SOLR query with the required fields self.fields = self.systemConfig.getArray("redbox-reports", "csv-output-fields") #We must have an ID field and it must be the first field fieldString = "id," if self.fields is not None: for field in self.fields: fieldString = fieldString + field.get("field-name") + "," fieldString = fieldString[:-1] req.setParam("fl", fieldString) out = ByteArrayOutputStream() try: self.indexer.search(req, out, self.format) except: #We can't get the result back from SOLR so fail back to the template display self.errorMsg = "Query failure. Failed to load the data - this issue has been logged (%s - %s)." % ( sys.exc_info()[0], sys.exc_info()[1]) self.log.error( "Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return try: csvResponseString = String(out.toByteArray(), "utf-8") csvResponseLines = csvResponseString.split("\n") except: #We can't get the result back from SOLR so fail back to the template display self.errorMsg = "Query failure. Failed to prepare the CSV - this issue has been logged (%s - %s)." % ( sys.exc_info()[0], sys.exc_info()[1]) self.log.error( "Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1])) return fileName = self.urlEncode(self.report.getLabel()) self.log.debug("Generating CSV report with file name: " + fileName) self.response.setHeader("Content-Disposition", "attachment; filename=%s.csv" % fileName) sw = StringWriter() parser = CSVParser() writer = CSVWriter(sw) count = 0 prevLine = "" badRowFlag = False for line in csvResponseLines: if badRowFlag: #In this section of code we'll handle errors by either trying to fix the problem #or by adding an error line in the CSV. We'll then move to the next row and keep going try: self.log.debug( "Reporting - trying to append the previous line with the previous faulty one. Line appears as: %s" % prevLine + line) csvLine = parser.parseLine(prevLine + line) badRowFlag = False prevLine = "" self.log.debug( "Reporting - remedy appears to have worked. Line appears as: %s" % prevLine + line) except: #We tried to rescue the file but failed on the second run so give up writer.writeNext( ["Failed to transfer record to CSV - check logs"]) self.log.error( "Reporting threw an exception (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], prevLine + line)) else: try: csvLine = parser.parseLine(line) badRowFlag = False prevLine = "" except: #This can happen if there's a newline in the index data #so we raise the badRowFlag and see if we can join this #row to the next one to fix it self.log.debug( "Reporting threw an exception but I'll see if it's just a formatting issue (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], line)) badRowFlag = True prevLine = line continue if count == 0: #Header row count += 1 for idx, csvValue in enumerate(csvLine): csvLine[idx] = self.findDisplayLabel(csvValue) elif csvLine[0] not in idQryList: #ignore continue writer.writeNext(csvLine) #Now send off the CSV self.out = self.response.getOutputStream("text/csv") self.out.print(sw.toString()) self.out.close()
class OaiData: def __init__(self): self.tokensDB = None def __activate__(self, context): if self.tokensDB is None: self.tokensDB = TokensDatabase(context) # Set up configuration self.systemConfig = JsonSimpleConfig() self.oaiConfig = None self.getMetadataFormats() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None self.lastPage = False # Check if the OAI request has an overriding portal ('set') to the URL paramSet = self.vc("formData").get("set") self.__portalName = context["page"].getPortal().getName() illegalSet = False if paramSet is not None: portals = self.vc("page").getPortals().keySet() if portals.contains(paramSet): self.__portalName = paramSet else: illegalSet = True self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger( None, ["portal", "oai-pmh", "sessionExpiry"]) # Check if there's a resumption token in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken is not None: token = self.tokensDB.getToken(resumptionToken) self.__currentToken = token # Process/parse the request we've received for validity self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken) if self.getError() is None and illegalSet: self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet) # If there are no errors... and the request requires some additional # data (like a search result) do so now. Everything else can be # handled in the templates. if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: # Find the metadata prefix requested self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() if resumptionToken is None: self.__buildResumptionTokenSets() else: self.__result = SolrResult(self.__currentToken.getResultJson()) # Only list records if the metadata format is enabled in this view if self.isInView(self.__metadataPrefix) == False: self.__result = None # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def isInView(self, format, view=None): # Sanity check if format is None or format == "": return False # Default to current poral if view is None: view = self.__portalName # Make sure there is some config for this format formatConfig = self.getMetadataFormats().get(format) if formatConfig is None: return False # Is it visible everywhere? allViews = formatConfig.getBoolean(False, ["enabledInAllViews"]) if allViews: return True # Check if it is visible in this view else: allowedViews = formatConfig.getStringList(["enabledViews"]) if view in allowedViews: return True # Rejection return False def getID(self, item): identifier = item.getFirst("oai_identifier") # Fallback to the default if identifier is None or identifier == "": return "oai:fascinator.usq.edu.au:" + item.getFirst("id") # Use the indexed value return identifier def isDeleted(self, item): return bool(item.getFirst("oai_deleted")) def getSet(self, item): set = item.getFirst("oai_set") # Fallback to the portal name if set is None or set == "": return self.__portalName # Use the required set return set def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __buildResumptionTokenSets(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator.usq.edu.au:"): idString = id.replace("oai:fascinator.usq.edu.au:", "") idString = self.__escapeQuery(idString) query = "id:" + idString # Or a custom OAI ID else: idString = self.__escapeQuery(id) query = "oai_identifier:" + idString req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Date data... is supplied fromDate = self.__request.getFromDate() untilDate = self.__request.getUntilDate() if fromDate is not None: fromStr = fromDate.isoformat() + "Z" self.log.debug("From Date: '{}'", fromStr) if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilStr) queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr) else: queryStr = "last_modified:[%s TO *]" % (fromStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) else: if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilDate.isoformat()) queryStr = "last_modified:[* TO %s]" % (untilStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) # Check if there's resumption token exist in the formData start = 0 req.setParam("start", str(start)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() self.log.debug("Total found:" + str(totalFound)) if totalFound > recordsPerPage: startRow = 0 random.seed() resumptionToken = "%016x" % random.getrandbits(128) nextResumptionToken = resumptionToken firstLoop = True while True: self.log.debug("Current Resumption Token: " + resumptionToken) req.setParam("start", str(startRow)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) result = SolrResult(ByteArrayInputStream(out.toByteArray())) tokenObject = ResumptionToken(resumptionToken, self.__metadataPrefix, nextResumptionToken, result.toString()) if firstLoop: self.__currentToken = ResumptionToken( None, self.__metadataPrefix, resumptionToken, None) tokenObject = None firstLoop = False startRow = startRow + recordsPerPage if startRow > totalFound: tokenObject = ResumptionToken(resumptionToken, self.__metadataPrefix, "", result.toString()) self.tokensDB.storeToken(tokenObject) break if tokenObject is not None: self.tokensDB.storeToken(tokenObject) resumptionToken = nextResumptionToken nextResumptionToken = "%016x" % random.getrandbits(128) self.log.debug("Resumption Token: " + resumptionToken + " next resumption token:" + nextResumptionToken) def getToken(self): if self.isInView(self.__metadataPrefix) and not self.lastPage: return self.__currentToken return None def getMetadataFormats(self): if self.oaiConfig is None: self.oaiConfig = self.systemConfig.getJsonSimpleMap( ["portal", "oai-pmh", "metadataFormats"]) return self.oaiConfig def encodeXml(self, string): return StringEscapeUtils.escapeXml(string) def getPayload(self, oid, metadataFileName): # First get the Object from storage object = None try: object = self.services.getStorage().getObject(oid) except StorageException, e: return None # Check whether the payload exists try: return object.getPayload(metadataFileName) except StorageException, e: return None
def __activate__(self, context): response = context["response"] log = context["log"] writer = response.getPrintWriter("text/plain; charset=UTF-8") auth = context["page"].authentication sessionState = context["sessionState"] result = JsonObject() result.put("status", "error") result.put("message", "An unknown error has occurred") if auth.is_admin(): services = context["Services"] formData = context["formData"] func = formData.get("func") oid = formData.get("oid") portalId = formData.get("portalId") portalManager = services.portalManager if func == "reharvest": # One object if oid: log.info(" * Reharvesting object '{}'", oid) portalManager.reharvest(oid) result.put("status", "ok") result.put("message", "Object '%s' queued for reharvest") # The whole portal elif portalId: log.info(" * Reharvesting view '{}'", portalId) sessionState.set("reharvest/running/" + portalId, "true") # TODO security filter - not necessary because this requires admin anyway? portal = portalManager.get(portalId) query = "*:*" if portal.query != "": query = portal.query if portal.searchQuery != "": if query == "*:*": query = portal.searchQuery else: query = query + " AND " + portal.searchQuery # query solr to get the objects to reharvest rows = 25 req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("rows", str(rows)) req.setParam("fl", "id") done = False count = 0 while not done: req.setParam("start", str(count)) out = ByteArrayOutputStream() services.indexer.search(req, out) json = SolrResult(ByteArrayInputStream(out.toByteArray())) objectIds = HashSet(json.getFieldList("id")) if not objectIds.isEmpty(): portalManager.reharvest(objectIds) count = count + rows total = json.getNumFound() log.info(" * Queued {} of {}...", (min(count, total), total)) done = (count >= total) sessionState.remove("reharvest/running/" + portalId) result.put("status", "ok") result.put("message", "Objects in '%s' queued for reharvest" % portalId) else: response.setStatus(500) result.put("message", "No object or view specified for reharvest") elif func == "reindex": if oid: log.info(" * Reindexing object '{}'", oid) services.indexer.index(oid) services.indexer.commit() result.put("status", "ok") result.put("message", "Object '%s' queued for reindex" % portalId) else: response.setStatus(500) result.put("message", "No object specified to reindex") else: response.setStatus(500) result.put("message", "Unknown action '%s'" % func) else: response.setStatus(500) result.put("message", "Only administrative users can access this API") writer.println(result.toString()) writer.close()
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.__latest = None self.__mine = None self.__workflows = None self.__result = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() portalQuery = self.services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = self.services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("last_modified:[NOW-1MONTH TO *]") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest(owner_query) req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('workflow_security:"' + current_user + '"') req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.vc("sessionState").set("fq", 'item_type:"object"') #sessionState.set("query", portalQuery.replace("\"", "'")) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) def getLatest(self): return self.__latest.getResults() def getMine(self): return self.__mine.getResults() def getWorkflows(self): return self.__workflows.getResults() def getItemCount(self): return self.__result.getNumFound()
class HarvestreportData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.services = self.vc("Services") self.request = self.vc("request") self.log = context["log"] self.__harvestedRecords = None uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) self.__harvestId = os.path.basename(uri) self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = self.services.getIndexer() # Security prep work isAdmin = self.vc("page").authentication.is_admin() if not isAdmin: print "ERROR: User is not an admin '" return None req = SearchRequest('harvestId:"' + self.__harvestId + '"') req.setParam("fq", 'eventType:modify') out = ByteArrayOutputStream() try: indexer.searchByIndex(req, out, "eventLog") except: print traceback.format_exc(); print repr(traceback.print_exc()) traceback.print_stack(file=sys.stdout) self.__harvestedRecords = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('harvestId:"' + self.__harvestId + '"') req.setParam("fq", 'eventType:modify') req.setParam("fq", 'isNew:true') out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('harvestId:"' + self.__harvestId + '"') req.setParam("fq", 'eventType:modify') req.setParam("fq", 'isModified:true') out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__modified = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('harvestId:"' + self.__harvestId + '" AND eventType:"modify" AND isModified:false') req.setParam("fq", 'isNew:false') out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__unmodified = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('harvestId:"' + self.__harvestId + '"') req.setParam("fq", 'eventType:harvestEnd') out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") endItem = SolrResult(ByteArrayInputStream(out.toByteArray())) endTimeList = endItem.getFieldList('eventTime') date = None; repoType = None; repoName = None; if(endTimeList.size() > 0): date = endTimeList.get(0) repoTypeList = endItem.getFieldList('repository_type') if(repoTypeList.size() > 0): repoType = repoTypeList.get(0) repoNameList = endItem.getFieldList('repository_name') if(repoTypeList.size() > 0): repoName = repoNameList.get(0) req = SearchRequest('repository_type:"' + repoType + '" AND repository_name:"' + repoName + '" AND eventType:"modify" AND isNew:true') req.setParam("fq", "eventTime:[* TO " + date + "]") out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__allRecords = SolrResult(ByteArrayInputStream(out.toByteArray())) def getItemcount(self): return self.__harvestedRecords.getNumFound() def getNewcount(self): return self.__latest.getNumFound() def getModifiedcount(self): return self.__modified.getNumFound() def getUnmodifiedcount(self): return self.__unmodified.getNumFound() def getTotalcount(self): return self.__allRecords.getNumFound()