class OaiData: def __init__(self): self.tokensDB = None def __activate__(self, context): if self.tokensDB is None: self.tokensDB = TokensDatabase(context) # Set up configuration self.systemConfig = JsonSimpleConfig() self.oaiConfig = None self.getMetadataFormats() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None # Check if the OAI request has an overriding portal ('set') to the URL paramSet = self.vc("formData").get("set") self.__portalName = context["page"].getPortal().getName() illegalSet = False if paramSet is not None: portals = self.vc("page").getPortals().keySet() if portals.contains(paramSet): self.__portalName = paramSet else: illegalSet = True self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) # Check if there's a resumption token in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken is not None: # This could still be be null self.__currentToken = self.tokensDB.getToken(resumptionToken) # Process/parse the request we've received for validity self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken) if self.getError() is None and illegalSet: self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet) # If there are no errors... and the request requires some additional # data (like a search result) do so now. Everything else can be # handled in the templates. if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: # Find the metadata prefix requested self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() # Only list records if the metadata format is enabled in this view if self.isInView(self.__metadataPrefix): self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def isInView(self, format, view = None): # Sanity check if format is None or format == "": return False # Default to current poral if view is None: view = self.__portalName # Make sure there is some config for this format formatConfig = self.getMetadataFormats().get(format) if formatConfig is None: return False # Is it visible everywhere? allViews = formatConfig.getBoolean(False, ["enabledInAllViews"]) if allViews: return True # Check if it is visible in this view else: allowedViews = formatConfig.getStringList(["enabledViews"]) if view in allowedViews: return True # Rejection return False def getID(self, item): identifier = item.getFirst("oai_identifier") # Fallback to the default if identifier is None or identifier == "": return "oai:fascinator.usq.edu.au:" + item.getFirst("id") # Use the indexed value return identifier def isDeleted(self, item): return bool(item.getFirst("oai_deleted")) def getSet(self, item): set = item.getFirst("oai_set") # Fallback to the portal name if set is None or set == "": return self.__portalName # Use the required set return set def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator.usq.edu.au:"): idString = id.replace("oai:fascinator.usq.edu.au:", "") idString = self.__escapeQuery(idString) query = "id:" + idString # Or a custom OAI ID else: idString = self.__escapeQuery(id) query = "oai_identifier:" + idString req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Date data... is supplied fromDate = self.__request.getFromDate() untilDate = self.__request.getUntilDate() if fromDate is not None: fromStr = fromDate.isoformat() + "Z" self.log.debug("From Date: '{}'", fromStr) if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilStr) queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr) else: queryStr = "last_modified:[%s TO *]" % (fromStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) else: if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilDate.isoformat()) queryStr = "last_modified:[* TO %s]" % (untilStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) # Check if there's resumption token exist in the formData newToken = None if self.__currentToken is not None: start = int(self.__currentToken.getStart()) totalFound = int(self.__currentToken.getTotalFound()) nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: newToken = self.__currentToken newToken.resetExpiry(self.__sessionExpiry) newToken.setStart(nextTokenStart) # or start a new resumption token else: start = 0 newToken = ResumptionToken(None, recordsPerPage, \ self.__metadataPrefix, self.__sessionExpiry) req.setParam("start", str(start)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: newToken = None # If an ID was requested, and not found, this is an error if id is not None and id != "": self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id) else: self.__request.setError("noRecordsMatch", "No records match this request") # We need to store this for NEW tokens elif self.__currentToken is None: # Assuming there are enough results to even keep the token if newToken.getStart() < totalFound: newToken.setTotalFound(totalFound) else: newToken = None # Check if we need to remove the resumption token else: if self.__result.getResults().size() < recordsPerPage: self.tokensDB.removeToken(self.__currentToken) # Store/update the resumption token if newToken is not None: # Brand new token if self.__currentToken is None: self.tokensDB.storeToken(newToken) # Or update an old token else: self.tokensDB.updateToken(newToken) self.__currentToken = newToken def getToken(self): if self.isInView(self.__metadataPrefix): return self.__currentToken return None def getMetadataFormats(self): if self.oaiConfig is None: self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) return self.oaiConfig def encodeXml(self, string): return StringEscapeUtils.escapeXml(string); def getPayload(self, oid, metadataFileName): # First get the Object from storage object = None try: object = self.services.getStorage().getObject(oid) except StorageException, e: return None # Check whether the payload exists try: return object.getPayload(metadataFileName) except StorageException, e: return None
class OaiData: def __init__(self): pass def __activate__(self, context): self.systemConfig = JsonSimpleConfig() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None self.__token = None self.__portalName = context["page"].getPortal().getName() self.__enabledInAllViews = False self.__enabledInViews = [] self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) self.__resumptionTokenList = self.sessionState.get("resumptionTokenList") if self.__resumptionTokenList == None: self.__resumptionTokenList = {} #Check if there's resumption token exist in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken: if self.__resumptionTokenList.has_key(resumptionToken): self.__currentToken = self.__resumptionTokenList[resumptionToken] print " * oai.py: formData=%s" % self.vc("formData") self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(self.vc("formData"), self.__currentToken, self.sessionState) if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: ## Only list those data if the metadata format is enabled self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() self.__enabledInAllViews = self.systemConfig.getBoolean(False, ["portal", "oai-pmh", "metadataFormats", self.__metadataPrefix, "enabledInAllViews"]) if self.__enabledInAllViews: self.__search() else: self.__enabledInViews = self.systemConfig.getStringList(["portal", "oai-pmh", "metadataFormats", self.__metadataPrefix, "enabledViews"]) if self.__portalName in self.__enabledInViews: self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.vc("portalId")) recordsPerPage = portal.recordsPerPage query = self.vc("formData").get("query") if query is None or query == "": query = "*:*" req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query print " * portalQuery=%s" % portalQuery if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") #Check if there's resumption token exist in the formData if self.__currentToken: start = self.__currentToken.getStart() totalFound = self.__currentToken.getTotalFound() nextTokenStart = start+recordsPerPage if nextTokenStart < totalFound: self.__token = ResumptionToken(start=nextTokenStart, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry) else: start = 0 metadataPrefix = self.vc("formData").get("metadataPrefix") self.__token = ResumptionToken(start=recordsPerPage, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry) req.setParam("start", str(start)) print " * oai.py:", req.toString() out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: self.__token = None elif self.__token: if self.__token.getStart() < totalFound: self.__token.setTotalFound(totalFound) else: self.__token = None #Storing the resumptionToken to session if self.__token: self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken()) #Need to know how long the server need to store this token self.sessionState.set("resumptionTokenList", self.__resumptionTokenList) def getToken(self): if self.__enabledInAllViews or self.__portalName in self.__enabledInViews: return self.__token return None def getMetadataFormats(self): return self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) def encodeXml(self, string): return StringEscapeUtils.escapeXml(string);
class OaiData: def __init__(self): pass def __activate__(self, context): # Set up configuration self.systemConfig = JsonSimpleConfig() self.oaiConfig = None self.getMetadataFormats() self.velocityContext = context self.services = context["Services"] self.log = context["log"] self.sessionState = context["sessionState"] self.portalDir = context["portalDir"] self.__result = None self.__token = None # Check if the OAI request has an overriding portal ('set') to the URL paramSet = self.vc("formData").get("set") self.__portalName = context["page"].getPortal().getName() if paramSet is not None: portals = self.vc("page").getPortals().keySet() if portals.contains(paramSet): self.__portalName = paramSet self.log.debug("=== PORTAL override! : {}", self.__portalName); self.__metadataPrefix = "" self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"]) self.__resumptionTokenList = self.sessionState.get("resumptionTokenList") if self.__resumptionTokenList == None: self.__resumptionTokenList = {} #Check if there's resumption token exist in the formData self.__currentToken = None resumptionToken = self.vc("formData").get("resumptionToken") if resumptionToken: if self.__resumptionTokenList.has_key(resumptionToken): self.__currentToken = self.__resumptionTokenList[resumptionToken] self.vc("request").setAttribute("Content-Type", "text/xml") self.__request = OaiPmhVerb(self.vc("formData"), self.__currentToken, self.sessionState, context) if self.getError() is None and \ self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]: ## Only list those data if the metadata format is enabled self.__metadataPrefix = self.vc("formData").get("metadataPrefix") if self.__metadataPrefix is None: self.__metadataPrefix = self.__currentToken.getMetadataPrefix() if self.isInView(self.__metadataPrefix): self.__search() # Get from velocity context def vc(self, index): if self.velocityContext[index] is not None: return self.velocityContext[index] else: self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist") return None def isInView(self, format, view = None): # Sanity check if format is None or format == "": return False # Default to current poral if view is None: view = self.__portalName # Make sure there is some config for this format formatConfig = self.getMetadataFormats().get(format) if formatConfig is None: return False # Is it visible everywhere? allViews = formatConfig.getBoolean(False, ["enabledInAllViews"]) if allViews: self.log.debug("=== Format '{}' is in all views", format) return True # Check if it is visible in this view else: allowedViews = formatConfig.getStringList(["enabledViews"]) if view in allowedViews: self.log.debug("=== Format '{}' is in view '{}'", format, view) return True # Rejection self.log.debug("=== Format '{}' is NOT in view '{}'", format, view) return False def getID(self, item): identifier = item.getFirst("oai_identifier") # Fallback to the default if identifier is None or identifier == "": return "oai:fascinator:" + item.getFirst("id") # Use the indexed value return identifier def getSet(self, item): set = item.getFirst("oai_set") # Fallback to the portal name if set is None or set == "": return self.__portalName # Use the required set return set def getVerb(self): return self.getRequest().getVerb() def getError(self): return self.getRequest().getError() def getResponseDate(self): return time.strftime("%Y-%m-%dT%H:%M:%SZ") def getRequest(self): return self.__request def getResult(self): return self.__result def getElement(self, elementName, values): elementStr = "" if values: for value in values: elementStr += "<%s>%s</%s>" % (elementName, value, elementName) return elementStr def __search(self): self.log.debug(" === __search()") self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") self.log.debug(" === ID: '{}'", id) query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator:"): query = "id:" + id.replace("oai:fascinator:", "") # Or a custom OAI ID else: query = "oai_identifier:" + id.replace(":", "\\:") self.log.debug(" === QUERY: '{}'", query) req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query self.log.debug(" * portalQuery={}", portalQuery) if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Check if there's resumption token exist in the formData if self.__currentToken: start = self.__currentToken.getStart() totalFound = self.__currentToken.getTotalFound() nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: self.__token = ResumptionToken(start = nextTokenStart, \ metadataPrefix = self.__metadataPrefix, \ sessionExpiry = self.__sessionExpiry) else: start = 0 metadataPrefix = self.vc("formData").get("metadataPrefix") self.__token = ResumptionToken(start = recordsPerPage, \ metadataPrefix = self.__metadataPrefix, \ sessionExpiry = self.__sessionExpiry) req.setParam("start", str(start)) self.log.debug(" * oai.py:", req.toString()) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: self.__token = None elif self.__token: if self.__token.getStart() < totalFound: self.__token.setTotalFound(totalFound) else: self.__token = None #Storing the resumptionToken to session if self.__token: self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken()) #Need to know how long the server need to store this token self.sessionState.set("resumptionTokenList", self.__resumptionTokenList) def getToken(self): if self.isInView(self.__metadataPrefix): return self.__token return None def getMetadataFormats(self): if self.oaiConfig is None: self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"]) return self.oaiConfig def encodeXml(self, string): return StringEscapeUtils.escapeXml(string); def getPayload(self, oid, metadataFileName): # First get the Object from storage object = None try: object = self.services.getStorage().getObject(oid) except StorageException, e: return None # Check whether the payload exists try: return object.getPayload(metadataFileName) except StorageException, e: return None