def __isIndexed(self, oid): query = 'id:"%s"' % oid req = SearchRequest(query) req.addParam("fq", 'item_type:"object"') out = ByteArrayOutputStream() self.Services.indexer.search(req, out) solrData = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrData.getNumFound()!=0
def __activate__(self, context): response = context["response"] writer = response.getPrintWriter("text/plain; charset=UTF-8") auth = context["page"].authentication result = JsonObject() result.put("status", "error") result.put("message", "An unknown error has occurred") if auth.is_logged_in() and auth.is_admin(): services = context["Services"] formData = context["formData"] func = formData.get("func") oid = formData.get("oid") portalId = formData.get("portalId") portalManager = services.portalManager if func == "reharvest": if oid: print "Reharvesting object '%s'" % oid portalManager.reharvest(oid) result.put("status", "ok") result.put("message", "Object '%s' queued for reharvest") elif portalId: print " Reharvesting view '%s'" % portalId # TODO security filter # TODO this should loop through the whole portal, # not just the first page of results portal = portalManager.get(portalId) req = SearchRequest(portal.query) req.setParam("fq", 'item_type:"object"') out = ByteArrayOutputStream(); services.indexer.search(req, out) json = SolrResult(ByteArrayInputStream(out.toByteArray())) objectIds = json.getFieldList("id") if not objectIds.isEmpty(): portalManager.reharvest(objectIds) result.put("status", "ok") result.put("message", "Objects in '%s' queued for reharvest" % portalId) else: response.setStatus(500) result.put("message", "No object or view specified for reharvest") elif func == "reindex": if oid: print "Reindexing object '%s'" % oid services.indexer.index(oid) services.indexer.commit() result.put("status", "ok") result.put("message", "Object '%s' queued for reindex" % portalId) else: response.setStatus(500) result.put("message", "No object specified to reindex") else: response.setStatus(500) result.put("message", "Unknown action '%s'" % func) else: response.setStatus(500) result.put("message", "Only administrative users can access this API") writer.println(result.toString()) writer.close()
def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.vc("portalId")) recordsPerPage = portal.recordsPerPage query = self.vc("formData").get("query") if query is None or query == "": query = "*:*" req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query print " * portalQuery=%s" % portalQuery if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") #Check if there's resumption token exist in the formData if self.__currentToken: start = self.__currentToken.getStart() totalFound = self.__currentToken.getTotalFound() nextTokenStart = start+recordsPerPage if nextTokenStart < totalFound: self.__token = ResumptionToken(start=nextTokenStart, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry) else: start = 0 metadataPrefix = self.vc("formData").get("metadataPrefix") self.__token = ResumptionToken(start=recordsPerPage, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry) req.setParam("start", str(start)) print " * oai.py:", req.toString() out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: self.__token = None elif self.__token: if self.__token.getStart() < totalFound: self.__token.setTotalFound(totalFound) else: self.__token = None #Storing the resumptionToken to session if self.__token: self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken()) #Need to know how long the server need to store this token self.sessionState.set("resumptionTokenList", self.__resumptionTokenList)
class HomeData: def __init__(self): pass def __activate__(self, context): self.velocityContext = context self.vc("sessionState").remove("fq") self.__latest = None self.__mine = None self.__workflows = None self.__result = None self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: print "ERROR: Requested context entry '" + index + "' doesn't exist" return None def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = Services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("last_modified:[NOW-1MONTH TO *]") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest(owner_query) req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('workflow_security:"' + current_user + '"') req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.vc("sessionState").set("fq", 'item_type:"object"') #sessionState.set("query", portalQuery.replace("\"", "'")) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) def getLatest(self): return self.__latest.getResults() def getMine(self): return self.__mine.getResults() def getWorkflows(self): return self.__workflows.getResults() def getItemCount(self): return self.__result.getNumFound()
def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get(self.vc("portalId")).getQuery() portalSearchQuery = Services.getPortalManager().get(self.vc("portalId")).getSearchQuery() # Security prep work current_user = self.vc("page").authentication.get_username() security_roles = self.vc("page").authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" isAdmin = self.vc("page").authentication.is_admin() req = SearchRequest("last_modified:[NOW-1MONTH TO *]") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest(owner_query) req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('workflow_security:"' + current_user + '"') req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not isAdmin: req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.vc("sessionState").set("fq", 'item_type:"object"') #sessionState.set("query", portalQuery.replace("\"", "'")) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))
class OaiData: def __init__(self): pass def __activate__(self, context): self.systemConfig = JsonSimpleConfig() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None self.__token = None self.__portalName = context["page"].getPortal().getName() self.__enabledInAllViews = False self.__enabledInViews = [] self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) self.__resumptionTokenList = self.sessionState.get("resumptionTokenList") if self.__resumptionTokenList == None: self.__resumptionTokenList = {} #Check if there's resumption token exist in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken: if self.__resumptionTokenList.has_key(resumptionToken): self.__currentToken = self.__resumptionTokenList[resumptionToken] print " * oai.py: formData=%s" % self.vc("formData") self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(self.vc("formData"), self.__currentToken, self.sessionState) if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: ## Only list those data if the metadata format is enabled self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() self.__enabledInAllViews = self.systemConfig.getBoolean(False, ["portal", "oai-pmh", "metadataFormats", self.__metadataPrefix, "enabledInAllViews"]) if self.__enabledInAllViews: self.__search() else: self.__enabledInViews = self.systemConfig.getStringList(["portal", "oai-pmh", "metadataFormats", self.__metadataPrefix, "enabledViews"]) if self.__portalName in self.__enabledInViews: self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.vc("portalId")) recordsPerPage = portal.recordsPerPage query = self.vc("formData").get("query") if query is None or query == "": query = "*:*" req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query print " * portalQuery=%s" % portalQuery if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") #Check if there's resumption token exist in the formData if self.__currentToken: start = self.__currentToken.getStart() totalFound = self.__currentToken.getTotalFound() nextTokenStart = start+recordsPerPage if nextTokenStart < totalFound: self.__token = ResumptionToken(start=nextTokenStart, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry) else: start = 0 metadataPrefix = self.vc("formData").get("metadataPrefix") self.__token = ResumptionToken(start=recordsPerPage, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry) req.setParam("start", str(start)) print " * oai.py:", req.toString() out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: self.__token = None elif self.__token: if self.__token.getStart() < totalFound: self.__token.setTotalFound(totalFound) else: self.__token = None #Storing the resumptionToken to session if self.__token: self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken()) #Need to know how long the server need to store this token self.sessionState.set("resumptionTokenList", self.__resumptionTokenList) def getToken(self): if self.__enabledInAllViews or self.__portalName in self.__enabledInViews: return self.__token return None def getMetadataFormats(self): return self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) def encodeXml(self, string): return StringEscapeUtils.escapeXml(string);
def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator.usq.edu.au:"): idString = id.replace("oai:fascinator.usq.edu.au:", "") idString = self.__escapeQuery(idString) query = "id:" + idString # Or a custom OAI ID else: idString = self.__escapeQuery(id) query = "oai_identifier:" + idString req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Date data... is supplied fromDate = self.__request.getFromDate() untilDate = self.__request.getUntilDate() if fromDate is not None: fromStr = fromDate.isoformat() + "Z" self.log.debug("From Date: '{}'", fromStr) if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilStr) queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr) else: queryStr = "last_modified:[%s TO *]" % (fromStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) else: if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilDate.isoformat()) queryStr = "last_modified:[* TO %s]" % (untilStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) # Check if there's resumption token exist in the formData newToken = None if self.__currentToken is not None: start = int(self.__currentToken.getStart()) totalFound = int(self.__currentToken.getTotalFound()) nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: newToken = self.__currentToken newToken.resetExpiry(self.__sessionExpiry) newToken.setStart(nextTokenStart) # or start a new resumption token else: start = 0 newToken = ResumptionToken(None, recordsPerPage, \ self.__metadataPrefix, self.__sessionExpiry) req.setParam("start", str(start)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: newToken = None # If an ID was requested, and not found, this is an error if id is not None and id != "": self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id) else: self.__request.setError("noRecordsMatch", "No records match this request") # We need to store this for NEW tokens elif self.__currentToken is None: # Assuming there are enough results to even keep the token if newToken.getStart() < totalFound: newToken.setTotalFound(totalFound) else: newToken = None # Check if we need to remove the resumption token else: if self.__result.getResults().size() < recordsPerPage: self.tokensDB.removeToken(self.__currentToken) # Store/update the resumption token if newToken is not None: # Brand new token if self.__currentToken is None: self.tokensDB.storeToken(newToken) # Or update an old token else: self.tokensDB.updateToken(newToken) self.__currentToken = newToken
class OaiData: def __init__(self): self.tokensDB = None def __activate__(self, context): if self.tokensDB is None: self.tokensDB = TokensDatabase(context) # Set up configuration self.systemConfig = JsonSimpleConfig() self.oaiConfig = None self.getMetadataFormats() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None # Check if the OAI request has an overriding portal ('set') to the URL paramSet = self.vc("formData").get("set") self.__portalName = context["page"].getPortal().getName() illegalSet = False if paramSet is not None: portals = self.vc("page").getPortals().keySet() if portals.contains(paramSet): self.__portalName = paramSet else: illegalSet = True self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) # Check if there's a resumption token in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken is not None: # This could still be be null self.__currentToken = self.tokensDB.getToken(resumptionToken) # Process/parse the request we've received for validity self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken) if self.getError() is None and illegalSet: self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet) # If there are no errors... and the request requires some additional # data (like a search result) do so now. Everything else can be # handled in the templates. if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: # Find the metadata prefix requested self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() # Only list records if the metadata format is enabled in this view if self.isInView(self.__metadataPrefix): self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def isInView(self, format, view = None): # Sanity check if format is None or format == "": return False # Default to current poral if view is None: view = self.__portalName # Make sure there is some config for this format formatConfig = self.getMetadataFormats().get(format) if formatConfig is None: return False # Is it visible everywhere? allViews = formatConfig.getBoolean(False, ["enabledInAllViews"]) if allViews: return True # Check if it is visible in this view else: allowedViews = formatConfig.getStringList(["enabledViews"]) if view in allowedViews: return True # Rejection return False def getID(self, item): identifier = item.getFirst("oai_identifier") # Fallback to the default if identifier is None or identifier == "": return "oai:fascinator.usq.edu.au:" + item.getFirst("id") # Use the indexed value return identifier def isDeleted(self, item): return bool(item.getFirst("oai_deleted")) def getSet(self, item): set = item.getFirst("oai_set") # Fallback to the portal name if set is None or set == "": return self.__portalName # Use the required set return set def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator.usq.edu.au:"): idString = id.replace("oai:fascinator.usq.edu.au:", "") idString = self.__escapeQuery(idString) query = "id:" + idString # Or a custom OAI ID else: idString = self.__escapeQuery(id) query = "oai_identifier:" + idString req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Date data... is supplied fromDate = self.__request.getFromDate() untilDate = self.__request.getUntilDate() if fromDate is not None: fromStr = fromDate.isoformat() + "Z" self.log.debug("From Date: '{}'", fromStr) if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilStr) queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr) else: queryStr = "last_modified:[%s TO *]" % (fromStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) else: if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilDate.isoformat()) queryStr = "last_modified:[* TO %s]" % (untilStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) # Check if there's resumption token exist in the formData newToken = None if self.__currentToken is not None: start = int(self.__currentToken.getStart()) totalFound = int(self.__currentToken.getTotalFound()) nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: newToken = self.__currentToken newToken.resetExpiry(self.__sessionExpiry) newToken.setStart(nextTokenStart) # or start a new resumption token else: start = 0 newToken = ResumptionToken(None, recordsPerPage, \ self.__metadataPrefix, self.__sessionExpiry) req.setParam("start", str(start)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: newToken = None # If an ID was requested, and not found, this is an error if id is not None and id != "": self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id) else: self.__request.setError("noRecordsMatch", "No records match this request") # We need to store this for NEW tokens elif self.__currentToken is None: # Assuming there are enough results to even keep the token if newToken.getStart() < totalFound: newToken.setTotalFound(totalFound) else: newToken = None # Check if we need to remove the resumption token else: if self.__result.getResults().size() < recordsPerPage: self.tokensDB.removeToken(self.__currentToken) # Store/update the resumption token if newToken is not None: # Brand new token if self.__currentToken is None: self.tokensDB.storeToken(newToken) # Or update an old token else: self.tokensDB.updateToken(newToken) self.__currentToken = newToken def getToken(self): if self.isInView(self.__metadataPrefix): return self.__currentToken return None def getMetadataFormats(self): if self.oaiConfig is None: self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) return self.oaiConfig def encodeXml(self, string): return StringEscapeUtils.escapeXml(string); def getPayload(self, oid, metadataFileName): # First get the Object from storage object = None try: object = self.services.getStorage().getObject(oid) except StorageException, e: return None # Check whether the payload exists try: return object.getPayload(metadataFileName) except StorageException, e: return None
def __search(self): requireEscape = False recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) query = None pagePath = self.__portal.getName() + "/" + self.pageName if query is None or query == "": query = self.formData.get("query") requireEscape = True if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query if requireEscape: query = self.__escapeQuery(query) query = "%s:%s" % (self.__searchField, query) self.sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query if requireEscape: anotarQuery = self.__escapeQuery(anotarQuery) annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() self.services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = SolrResult(ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getResults() ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.get("annotatesUri") ids.add(annotatesUri) print "Found annotation for %s" % annotatesUri # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort())) req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", self.__sortBy) # setup facets if self.__useSessionNavigation: action = self.formData.get("verb") value = self.formData.get("value") fq = self.sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) else: navUri = uri[len(pagePath):] self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri) savedfq = self.sessionState.get("savedfq") limits = [] if savedfq: limits.extend(savedfq) if fq: limits.extend(fq) self.sessionState.set("savedfq", limits) for q in fq: req.addParam("fq", URLDecoder.decode(q, "UTF-8")) portalQuery = self.__portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", 'item_type:"object"') if req.getParams("fq"): self.__selected = ArrayList(req.getParams("fq")) if self.__useSessionNavigation: self.sessionState.set("fq", self.__selected) self.sessionState.set("searchQuery", portalSearchQuery) self.sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not self.page.authentication.is_admin(): current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req.addParam("fq", security_query) req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) print " * search.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, int(self.__result.getNumFound()), self.__portal.recordsPerPage)
class SearchData: def __activate__(self, context): self.services = context["Services"] self.page = context["page"] self.formData = context["formData"] self.portalId = context["portalId"] self.sessionState = context["sessionState"] self.request = context["request"] self.pageName = context["pageName"] self.__portal = context["page"].getPortal() self.__useSessionNavigation = self.__portal.getBoolean(True, ["portal", "use-session-navigation"]) self.__result = None if self.__useSessionNavigation: self.__pageNum = self.sessionState.get("pageNum", 1) else: self.__pageNum = 1 self.__selected = ArrayList() self.__fqParts = [] self.__searchField = self.formData.get("searchField", "full_text") self.__sortField = self.formData.get("sort-field") self.__sortOrder = self.formData.get("sort-order") if not (self.__sortField or self.__sortOrder): # use form data not specified, check session self.__sortField = self.sessionState.get("sortField", "score") self.__sortOrder = self.sessionState.get("sortOrder", "desc") self.sessionState.set("sortField", self.__sortField) self.sessionState.set("sortOrder", self.__sortOrder) self.__sortBy = "%s %s" % (self.__sortField, self.__sortOrder) # reset the query and facet selections when changing views lastPortalId = self.sessionState.get("lastPortalId") if lastPortalId != self.portalId: self.sessionState.remove("fq") self.sessionState.remove("pageNum") self.sessionState.remove("sortField") self.sessionState.remove("sortOrder") self.__pageNum = 1 self.sessionState.set("lastPortalId", self.portalId) self.__search() def usingSessionNavigation(self): return self.__restful def getPortalName(self): return self.__portal.getDescription() def getSearchField(self): return self.__searchField def __search(self): requireEscape = False recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) query = None pagePath = self.__portal.getName() + "/" + self.pageName if query is None or query == "": query = self.formData.get("query") requireEscape = True if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query if requireEscape: query = self.__escapeQuery(query) query = "%s:%s" % (self.__searchField, query) self.sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query if requireEscape: anotarQuery = self.__escapeQuery(anotarQuery) annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() self.services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = SolrResult(ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getResults() ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.get("annotatesUri") ids.add(annotatesUri) print "Found annotation for %s" % annotatesUri # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort())) req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", self.__sortBy) # setup facets if self.__useSessionNavigation: action = self.formData.get("verb") value = self.formData.get("value") fq = self.sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) else: navUri = uri[len(pagePath):] self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri) savedfq = self.sessionState.get("savedfq") limits = [] if savedfq: limits.extend(savedfq) if fq: limits.extend(fq) self.sessionState.set("savedfq", limits) for q in fq: req.addParam("fq", URLDecoder.decode(q, "UTF-8")) portalQuery = self.__portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", 'item_type:"object"') if req.getParams("fq"): self.__selected = ArrayList(req.getParams("fq")) if self.__useSessionNavigation: self.sessionState.set("fq", self.__selected) self.sessionState.set("searchQuery", portalSearchQuery) self.sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not self.page.authentication.is_admin(): current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req.addParam("fq", security_query) req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) print " * search.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, int(self.__result.getNumFound()), self.__portal.recordsPerPage) def __escapeQuery(self, q): temp = "" chars = "+-&|!(){}[]^\"~*?:\\" for c in q: if c in chars: temp += "\%s" % c else: temp += c return temp # eq = q # # escape all solr/lucene special chars # # from http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Escaping%20Special%20Characters # for c in "+-&|!(){}[]^\"~*?:\\": # eq = eq.replace(c, "\\%s" % c) # ## Escape UTF8 # try: # return URLEncoder.encode(eq, "UTF-8") # except UnsupportedEncodingException, e: # print "Error during UTF8 escape! ", repr(eq) # return eq def getQueryTime(self): return int(self.__result.get("responseHeader/QTime")) / 1000.0; def getPaging(self): return self.__paging def getResult(self): return self.__result def getFacetField(self, key): return self.__portal.facetFields.get(key) def getFacetName(self, key): return self.__portal.facetFields.get(key).getString(None, ["label"]) def getFacetCounts(self, key): if self.__useSessionNavigation: facetData = self.__result.getFacets() if facetData is None: return LinkedHashMap() if not facetData.containsKey(key): return LinkedHashMap() return facetData.get(key).values() else: return LinkedHashMap() # TODO : What were these doing? Hiding file path facets unless some facets are selected? #if name.find("/") == -1 or self.hasSelectedFacets(): # values.put(name, count) def getFacetDisplay(self): return self.__portal.facetDisplay def hasSelectedFacets(self): return (self.__selected is not None and len(self.__selected) > 1) and \ not (self.__portal.query in self.__selected and len(self.__selected) == 2) def getSelectedFacets(self): return self.__selected def isPortalQueryFacet(self, fq): return fq == self.__portal.query def isSelected(self, fq): return fq in self.__selected def getSelectedFacetIds(self): return [md5.new(fq).hexdigest() for fq in self.__selected] def getFileName(self, path): return os.path.splitext(os.path.basename(path))[0] def getFacetQuery(self, name, value): return '%s:"%s"' % (name, value) # Packaging support def getActiveManifestTitle(self): return self.__getActiveManifest().getTitle() def getActiveManifestId(self): return self.sessionState.get("package/active/id") def getSelectedItemsCount(self): return self.__getActiveManifest().size() def isSelectedForPackage(self, oid): result = self.__getActiveManifest().getNode("node-%s" % oid) return (result is not None) def getManifestItemTitle(self, oid, defaultValue): result = self.__getActiveManifest().getNode("node-%s" % oid) if result is None: return defaultValue return result.getTitle() def __getActiveManifest(self): activeManifest = self.sessionState.get("package/active") if not activeManifest: activeManifest = Manifest(None) activeManifest.setTitle("New package") activeManifest.setViewId(self.__portal.getName()) self.sessionState.set("package/active", activeManifest) return activeManifest def isSelectableForPackage(self, oid): return oid != self.getActiveManifestId() def getSortFields(self): return self.__portal.sortFields def getSortField(self): return self.__sortField def getSortOrder(self): return self.__sortOrder # RESTful style URL support methods def getPageQuery(self, page): prefix = "" if self.__fqParts: prefix = "/" + "/".join(self.__fqParts) suffix = "" if page > 1: suffix = "/page/%s" % page return prefix + suffix def getFacetQueryUri(self, name, value): return "%s/%s" % (name, value) def getFacetValue(self, facetValue): return facetValue.split("/")[-1] def getLimitQueryWith(self, fq): limits = ArrayList(self.__fqParts) limits.add("category/" + fq) return "/".join(limits) def getFacetIndent(self, facetValue): return len(facetValue.split("/")) def getLimitQueryWithout(self, fq): limits = ArrayList(self.__fqParts) limits.remove("category/" + fq) if limits.isEmpty(): return "" return "/".join(limits) def __parseUri(self, uri): page = 1 fq = [] fqParts = [] if uri != "": parts = uri.split("/") partType = None facetKey = None facetValues = None for part in parts: if partType == "page": facetKey = None page = int(part) elif partType == "category": partType = "category-value" facetValues = None facetKey = part elif partType == "category-value": if facetValues is None: facetValues = [] if part in ["page", "category"]: partType = part facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues)) fq.append(facetQuery) fqParts.append("category/%s/%s" % (facetKey, "/".join(facetValues))) facetKey = None facetValues = None else: facetValues.append(URLDecoder.decode(part)) else: partType = part if partType == "category-value": facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues)) fq.append(facetQuery) fqParts.append("category/%s/%s" % (facetKey, "/".join(facetValues))) return page, fq, fqParts
def __search(self): self.log.debug(" === __search()") self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") self.log.debug(" === ID: '{}'", id) query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator:"): query = "id:" + id.replace("oai:fascinator:", "") # Or a custom OAI ID else: query = "oai_identifier:" + id.replace(":", "\\:") self.log.debug(" === QUERY: '{}'", query) req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query self.log.debug(" * portalQuery={}", portalQuery) if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Check if there's resumption token exist in the formData if self.__currentToken: start = self.__currentToken.getStart() totalFound = self.__currentToken.getTotalFound() nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: self.__token = ResumptionToken(start = nextTokenStart, \ metadataPrefix = self.__metadataPrefix, \ sessionExpiry = self.__sessionExpiry) else: start = 0 metadataPrefix = self.vc("formData").get("metadataPrefix") self.__token = ResumptionToken(start = recordsPerPage, \ metadataPrefix = self.__metadataPrefix, \ sessionExpiry = self.__sessionExpiry) req.setParam("start", str(start)) self.log.debug(" * oai.py:", req.toString()) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: self.__token = None elif self.__token: if self.__token.getStart() < totalFound: self.__token.setTotalFound(totalFound) else: self.__token = None #Storing the resumptionToken to session if self.__token: self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken()) #Need to know how long the server need to store this token self.sessionState.set("resumptionTokenList", self.__resumptionTokenList)
class OaiData: def __init__(self): pass def __activate__(self, context): # Set up configuration self.systemConfig = JsonSimpleConfig() self.oaiConfig = None self.getMetadataFormats() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None self.__token = None # Check if the OAI request has an overriding portal ('set') to the URL paramSet = self.vc("formData").get("set") self.__portalName = context["page"].getPortal().getName() if paramSet is not None: portals = self.vc("page").getPortals().keySet() if portals.contains(paramSet): self.__portalName = paramSet self.log.debug("=== PORTAL override! : {}", self.__portalName); self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) self.__resumptionTokenList = self.sessionState.get("resumptionTokenList") if self.__resumptionTokenList == None: self.__resumptionTokenList = {} #Check if there's resumption token exist in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken: if self.__resumptionTokenList.has_key(resumptionToken): self.__currentToken = self.__resumptionTokenList[resumptionToken] self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(self.vc("formData"), self.__currentToken, self.sessionState, context) if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: ## Only list those data if the metadata format is enabled self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() if self.isInView(self.__metadataPrefix): self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def isInView(self, format, view = None): # Sanity check if format is None or format == "": return False # Default to current poral if view is None: view = self.__portalName # Make sure there is some config for this format formatConfig = self.getMetadataFormats().get(format) if formatConfig is None: return False # Is it visible everywhere? allViews = formatConfig.getBoolean(False, ["enabledInAllViews"]) if allViews: self.log.debug("=== Format '{}' is in all views", format) return True # Check if it is visible in this view else: allowedViews = formatConfig.getStringList(["enabledViews"]) if view in allowedViews: self.log.debug("=== Format '{}' is in view '{}'", format, view) return True # Rejection self.log.debug("=== Format '{}' is NOT in view '{}'", format, view) return False def getID(self, item): identifier = item.getFirst("oai_identifier") # Fallback to the default if identifier is None or identifier == "": return "oai:fascinator:" + item.getFirst("id") # Use the indexed value return identifier def getSet(self, item): set = item.getFirst("oai_set") # Fallback to the portal name if set is None or set == "": return self.__portalName # Use the required set return set def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.log.debug(" === __search()") self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") self.log.debug(" === ID: '{}'", id) query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator:"): query = "id:" + id.replace("oai:fascinator:", "") # Or a custom OAI ID else: query = "oai_identifier:" + id.replace(":", "\\:") self.log.debug(" === QUERY: '{}'", query) req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query self.log.debug(" * portalQuery={}", portalQuery) if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Check if there's resumption token exist in the formData if self.__currentToken: start = self.__currentToken.getStart() totalFound = self.__currentToken.getTotalFound() nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: self.__token = ResumptionToken(start = nextTokenStart, \ metadataPrefix = self.__metadataPrefix, \ sessionExpiry = self.__sessionExpiry) else: start = 0 metadataPrefix = self.vc("formData").get("metadataPrefix") self.__token = ResumptionToken(start = recordsPerPage, \ metadataPrefix = self.__metadataPrefix, \ sessionExpiry = self.__sessionExpiry) req.setParam("start", str(start)) self.log.debug(" * oai.py:", req.toString()) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: self.__token = None elif self.__token: if self.__token.getStart() < totalFound: self.__token.setTotalFound(totalFound) else: self.__token = None #Storing the resumptionToken to session if self.__token: self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken()) #Need to know how long the server need to store this token self.sessionState.set("resumptionTokenList", self.__resumptionTokenList) def getToken(self): if self.isInView(self.__metadataPrefix): return self.__token return None def getMetadataFormats(self): if self.oaiConfig is None: self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) return self.oaiConfig def encodeXml(self, string): return StringEscapeUtils.escapeXml(string); def getPayload(self, oid, metadataFileName): # First get the Object from storage object = None try: object = self.services.getStorage().getObject(oid) except StorageException, e: return None # Check whether the payload exists try: return object.getPayload(metadataFileName) except StorageException, e: return None