class OaiData: def __init__(self): self.tokensDB = None def __activate__(self, context): if self.tokensDB is None: self.tokensDB = TokensDatabase(context) # Set up configuration self.systemConfig = JsonSimpleConfig() self.oaiConfig = None self.getMetadataFormats() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None # Check if the OAI request has an overriding portal ('set') to the URL paramSet = self.vc("formData").get("set") self.__portalName = context["page"].getPortal().getName() illegalSet = False if paramSet is not None: portals = self.vc("page").getPortals().keySet() if portals.contains(paramSet): self.__portalName = paramSet else: illegalSet = True self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) # Check if there's a resumption token in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken is not None: # This could still be be null self.__currentToken = self.tokensDB.getToken(resumptionToken) # Process/parse the request we've received for validity self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken) if self.getError() is None and illegalSet: self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet) # If there are no errors... and the request requires some additional # data (like a search result) do so now. Everything else can be # handled in the templates. if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: # Find the metadata prefix requested self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() # Only list records if the metadata format is enabled in this view if self.isInView(self.__metadataPrefix): self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def isInView(self, format, view = None): # Sanity check if format is None or format == "": return False # Default to current poral if view is None: view = self.__portalName # Make sure there is some config for this format formatConfig = self.getMetadataFormats().get(format) if formatConfig is None: return False # Is it visible everywhere? allViews = formatConfig.getBoolean(False, ["enabledInAllViews"]) if allViews: return True # Check if it is visible in this view else: allowedViews = formatConfig.getStringList(["enabledViews"]) if view in allowedViews: return True # Rejection return False def getID(self, item): identifier = item.getFirst("oai_identifier") # Fallback to the default if identifier is None or identifier == "": return "oai:fascinator.usq.edu.au:" + item.getFirst("id") # Use the indexed value return identifier def isDeleted(self, item): return bool(item.getFirst("oai_deleted")) def getSet(self, item): set = item.getFirst("oai_set") # Fallback to the portal name if set is None or set == "": return self.__portalName # Use the required set return set def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator.usq.edu.au:"): idString = id.replace("oai:fascinator.usq.edu.au:", "") idString = self.__escapeQuery(idString) query = "id:" + idString # Or a custom OAI ID else: idString = self.__escapeQuery(id) query = "oai_identifier:" + idString req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Date data... is supplied fromDate = self.__request.getFromDate() untilDate = self.__request.getUntilDate() if fromDate is not None: fromStr = fromDate.isoformat() + "Z" self.log.debug("From Date: '{}'", fromStr) if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilStr) queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr) else: queryStr = "last_modified:[%s TO *]" % (fromStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) else: if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilDate.isoformat()) queryStr = "last_modified:[* TO %s]" % (untilStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) # Check if there's resumption token exist in the formData newToken = None if self.__currentToken is not None: start = int(self.__currentToken.getStart()) totalFound = int(self.__currentToken.getTotalFound()) nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: newToken = self.__currentToken newToken.resetExpiry(self.__sessionExpiry) newToken.setStart(nextTokenStart) # or start a new resumption token else: start = 0 newToken = ResumptionToken(None, recordsPerPage, \ self.__metadataPrefix, self.__sessionExpiry) req.setParam("start", str(start)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: newToken = None # If an ID was requested, and not found, this is an error if id is not None and id != "": self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id) else: self.__request.setError("noRecordsMatch", "No records match this request") # We need to store this for NEW tokens elif self.__currentToken is None: # Assuming there are enough results to even keep the token if newToken.getStart() < totalFound: newToken.setTotalFound(totalFound) else: newToken = None # Check if we need to remove the resumption token else: if self.__result.getResults().size() < recordsPerPage: self.tokensDB.removeToken(self.__currentToken) # Store/update the resumption token if newToken is not None: # Brand new token if self.__currentToken is None: self.tokensDB.storeToken(newToken) # Or update an old token else: self.tokensDB.updateToken(newToken) self.__currentToken = newToken def getToken(self): if self.isInView(self.__metadataPrefix): return self.__currentToken return None def getMetadataFormats(self): if self.oaiConfig is None: self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) return self.oaiConfig def encodeXml(self, string): return StringEscapeUtils.escapeXml(string); def getPayload(self, oid, metadataFileName): # First get the Object from storage object = None try: object = self.services.getStorage().getObject(oid) except StorageException, e: return None # Check whether the payload exists try: return object.getPayload(metadataFileName) except StorageException, e: return None
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.__latest = None self.__mine = None self.__workflows = None self.__result = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = Services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("last_modified:[NOW-1MONTH TO *]") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest(owner_query) req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('workflow_security:"' + current_user + '"') req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.vc("sessionState").set("fq", 'item_type:"object"') #sessionState.set("query", portalQuery.replace("\"", "'")) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) def getLatest(self): return self.__latest.getResults() def getMine(self): return self.__mine.getResults() def getWorkflows(self): return self.__workflows.getResults() def getItemCount(self): return self.__result.getNumFound()
def __search(self): requireEscape = False recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) query = None pagePath = self.__portal.getName() + "/" + self.pageName if query is None or query == "": query = self.formData.get("query") requireEscape = True if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query if requireEscape: query = self.__escapeQuery(query) query = "%s:%s" % (self.__searchField, query) self.sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query if requireEscape: anotarQuery = self.__escapeQuery(anotarQuery) annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() self.services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = SolrResult(ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getResults() ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.get("annotatesUri") ids.add(annotatesUri) print "Found annotation for %s" % annotatesUri # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort())) req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", self.__sortBy) # setup facets if self.__useSessionNavigation: action = self.formData.get("verb") value = self.formData.get("value") fq = self.sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) else: navUri = uri[len(pagePath):] self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri) savedfq = self.sessionState.get("savedfq") limits = [] if savedfq: limits.extend(savedfq) if fq: limits.extend(fq) self.sessionState.set("savedfq", limits) for q in fq: req.addParam("fq", URLDecoder.decode(q, "UTF-8")) portalQuery = self.__portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", 'item_type:"object"') if req.getParams("fq"): self.__selected = ArrayList(req.getParams("fq")) if self.__useSessionNavigation: self.sessionState.set("fq", self.__selected) self.sessionState.set("searchQuery", portalSearchQuery) self.sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not self.page.authentication.is_admin(): current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req.addParam("fq", security_query) req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) print " * search.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, int(self.__result.getNumFound()), self.__portal.recordsPerPage)