Beispiel #1
0
class OaiData:
    def __init__(self):
        self.tokensDB = None

    def __activate__(self, context):
        if self.tokensDB is None:
            self.tokensDB = TokensDatabase(context)

        # Set up configuration
        self.systemConfig = JsonSimpleConfig()
        self.oaiConfig = None
        self.getMetadataFormats()

        self.velocityContext = context
        self.services = context["Services"]
        self.log = context["log"]
        self.sessionState = context["sessionState"]
        self.portalDir = context["portalDir"]

        self.__result = None

        # Check if the OAI request has an overriding portal ('set') to the URL
        paramSet = self.vc("formData").get("set")
        self.__portalName = context["page"].getPortal().getName()
        illegalSet = False
        if paramSet is not None:
            portals = self.vc("page").getPortals().keySet()
            if portals.contains(paramSet):
                self.__portalName = paramSet
            else:
                illegalSet = True

        self.__metadataPrefix = ""
        self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"])

        # Check if there's a resumption token in the formData
        self.__currentToken = None
        resumptionToken = self.vc("formData").get("resumptionToken")
        if resumptionToken is not None:
            # This could still be be null
            self.__currentToken = self.tokensDB.getToken(resumptionToken)

        # Process/parse the request we've received for validity
        self.vc("request").setAttribute("Content-Type", "text/xml")
        self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken)
        if self.getError() is None and illegalSet:
            self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet)

        # If there are no errors... and the request requires some additional
        #  data (like a search result) do so now. Everything else can be
        #  handled in the templates.
        if self.getError() is None and \
                self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]:

            # Find the metadata prefix requested
            self.__metadataPrefix = self.vc("formData").get("metadataPrefix")
            if self.__metadataPrefix is None:
                self.__metadataPrefix = self.__currentToken.getMetadataPrefix()

            # Only list records if the metadata format is enabled in this view
            if self.isInView(self.__metadataPrefix):
                self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist")
            return None

    def isInView(self, format, view = None):
        # Sanity check
        if format is None or format == "":
            return False
        # Default to current poral
        if view is None:
            view = self.__portalName

        # Make sure there is some config for this format
        formatConfig = self.getMetadataFormats().get(format)
        if formatConfig is None:
            return False
        # Is it visible everywhere?
        allViews = formatConfig.getBoolean(False, ["enabledInAllViews"])
        if allViews:
            return True
        # Check if it is visible in this view
        else:
            allowedViews = formatConfig.getStringList(["enabledViews"])
            if view in allowedViews:
                return True
        # Rejection
        return False

    def getID(self, item):
        identifier = item.getFirst("oai_identifier")
        # Fallback to the default
        if identifier is None or identifier == "":
            return "oai:fascinator.usq.edu.au:" + item.getFirst("id")
        # Use the indexed value
        return identifier

    def isDeleted(self, item):
        return bool(item.getFirst("oai_deleted"))

    def getSet(self, item):
        set = item.getFirst("oai_set")
        # Fallback to the portal name
        if set is None or set == "":
            return self.__portalName
        # Use the required set
        return set

    def getVerb(self):
        return self.getRequest().getVerb()

    def getError(self):
        return self.getRequest().getError()

    def getResponseDate(self):
        return time.strftime("%Y-%m-%dT%H:%M:%SZ")

    def getRequest(self):
        return self.__request

    def getResult(self):
        return self.__result

    def getElement(self, elementName, values):
        elementStr = ""
        if values:
            for value in values:
                elementStr += "<%s>%s</%s>" % (elementName, value, elementName)
        return elementStr

    def __search(self):
        self.__result = SolrResult(None)

        portal = self.services.getPortalManager().get(self.__portalName)
        recordsPerPage = portal.recordsPerPage

        # Resolve our identifier
        id = self.vc("formData").get("identifier")
        query = "*:*"
        if id is not None and id != "":
            # A default TF2 OID
            if id.startswith("oai:fascinator.usq.edu.au:"):
                idString = id.replace("oai:fascinator.usq.edu.au:", "")
                idString = self.__escapeQuery(idString)
                query = "id:" + idString
            # Or a custom OAI ID
            else:
                idString = self.__escapeQuery(id)
                query = "oai_identifier:" + idString

        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", str(recordsPerPage))
        req.setParam("facet.field", portal.facetFieldList)
        req.setParam("facet.limit", str(portal.facetCount))
        req.setParam("sort", "f_dc_title asc")

        portalQuery = portal.query
        if portalQuery:
            req.addParam("fq", portalQuery)
        req.addParam("fq", "item_type:object")

        # Date data... is supplied
        fromDate = self.__request.getFromDate()
        untilDate = self.__request.getUntilDate()
        if fromDate is not None:
            fromStr = fromDate.isoformat() + "Z"
            self.log.debug("From Date: '{}'", fromStr)
            if untilDate is not None:
                untilStr = untilDate.isoformat() + "Z"
                self.log.debug("Until Date: '{}'", untilStr)
                queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr)
            else:
                queryStr = "last_modified:[%s TO *]" % (fromStr)
            self.log.debug("Date query: '{}'", queryStr)
            req.addParam("fq", queryStr)
        else:
            if untilDate is not None:
                untilStr = untilDate.isoformat() + "Z"
                self.log.debug("Until Date: '{}'", untilDate.isoformat())
                queryStr = "last_modified:[* TO %s]" % (untilStr)
                self.log.debug("Date query: '{}'", queryStr)
                req.addParam("fq", queryStr)

        # Check if there's resumption token exist in the formData
        newToken = None
        if self.__currentToken is not None:
            start = int(self.__currentToken.getStart())
            totalFound = int(self.__currentToken.getTotalFound())
            nextTokenStart = start + recordsPerPage
            if nextTokenStart < totalFound:
                newToken = self.__currentToken
                newToken.resetExpiry(self.__sessionExpiry)
                newToken.setStart(nextTokenStart)
        # or start a new resumption token
        else:
            start = 0
            newToken = ResumptionToken(None, recordsPerPage, \
                        self.__metadataPrefix, self.__sessionExpiry)

        req.setParam("start", str(start))

        out = ByteArrayOutputStream()
        self.services.indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))

        totalFound = self.__result.getNumFound()
        if totalFound == 0:
            newToken = None
            # If an ID was requested, and not found, this is an error
            if id is not None and id != "":
                self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id)
            else:
                self.__request.setError("noRecordsMatch", "No records match this request")

        # We need to store this for NEW tokens
        elif self.__currentToken is None:
            # Assuming there are enough results to even keep the token
            if newToken.getStart() < totalFound:
                newToken.setTotalFound(totalFound)
            else:
                newToken = None
        # Check if we need to remove the resumption token
        else:
            if self.__result.getResults().size() < recordsPerPage:
                self.tokensDB.removeToken(self.__currentToken)

        # Store/update the resumption token
        if newToken is not None:
            # Brand new token
            if self.__currentToken is None:
                self.tokensDB.storeToken(newToken)
            # Or update an old token
            else:
                self.tokensDB.updateToken(newToken)
            self.__currentToken = newToken

    def getToken(self):
        if self.isInView(self.__metadataPrefix):
            return self.__currentToken
        return None

    def getMetadataFormats(self):
        if self.oaiConfig is None:
            self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"])
        return self.oaiConfig

    def encodeXml(self, string):
        return StringEscapeUtils.escapeXml(string);

    def getPayload(self, oid, metadataFileName):
        # First get the Object from storage
        object = None
        try:
            object = self.services.getStorage().getObject(oid)
        except StorageException, e:
            return None

        # Check whether the payload exists
        try:
            return object.getPayload(metadataFileName)
        except StorageException, e:
            return None
Beispiel #2
0
class HomeData:
    def __init__(self):
        pass

    def __activate__(self, context):
        self.velocityContext = context
        self.vc("sessionState").remove("fq")
        self.__latest = None
        self.__mine = None
        self.__workflows = None
        self.__result = None
        self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            print "ERROR: Requested context entry '" + index + "' doesn't exist"
            return None

    def __search(self):
        indexer = Services.getIndexer()
        portalQuery = Services.getPortalManager().get(self.vc("portalId")).getQuery()
        portalSearchQuery = Services.getPortalManager().get(self.vc("portalId")).getSearchQuery()
        
        # Security prep work
        current_user = self.vc("page").authentication.get_username()
        security_roles = self.vc("page").authentication.get_roles_list()
        security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")'
        security_exceptions = 'security_exception:"' + current_user + '"'
        owner_query = 'owner:"' + current_user + '"'
        security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")"
        isAdmin = self.vc("page").authentication.is_admin()

        req = SearchRequest("last_modified:[NOW-1MONTH TO *]")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest(owner_query)
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req = SearchRequest('workflow_security:"' + current_user + '"')
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req = SearchRequest("*:*")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.addParam("fq", "")
        req.setParam("rows", "0")
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        
        self.vc("sessionState").set("fq", 'item_type:"object"')
        #sessionState.set("query", portalQuery.replace("\"", "'"))
        
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))
    
    def getLatest(self):
        return self.__latest.getResults()
    
    def getMine(self):
        return self.__mine.getResults()

    def getWorkflows(self):
        return self.__workflows.getResults()

    def getItemCount(self):
        return self.__result.getNumFound()
 def __search(self):
     requireEscape = False
     recordsPerPage = self.__portal.recordsPerPage
     uri = URLDecoder.decode(self.request.getAttribute("RequestURI"))
     query = None
     pagePath = self.__portal.getName() + "/" + self.pageName
     if query is None or query == "":
         query = self.formData.get("query")
         requireEscape = True
     if query is None or query == "":
         query = "*:*"
     
     if query == "*:*":
         self.__query = ""
     else:
         self.__query = query
         if requireEscape:
             query = self.__escapeQuery(query)
         query = "%s:%s" % (self.__searchField, query)
     self.sessionState.set("query", self.__query)
     
     # find objects with annotations matching the query
     if query != "*:*":
         anotarQuery = self.__query
         if requireEscape:
             anotarQuery = self.__escapeQuery(anotarQuery)
         annoReq = SearchRequest(anotarQuery)
         annoReq.setParam("facet", "false")
         annoReq.setParam("rows", str(99999))
         annoReq.setParam("sort", "dateCreated asc")
         annoReq.setParam("start", str(0))
         anotarOut = ByteArrayOutputStream()
         self.services.indexer.annotateSearch(annoReq, anotarOut)
         resultForAnotar = SolrResult(ByteArrayInputStream(anotarOut.toByteArray()))
         resultForAnotar = resultForAnotar.getResults()
         ids = HashSet()
         for annoDoc in resultForAnotar:
             annotatesUri = annoDoc.get("annotatesUri")
             ids.add(annotatesUri)
             print "Found annotation for %s" % annotatesUri
         # add annotation ids to query
         query += ' OR id:("' + '" OR "'.join(ids) + '")'
     
     portalSearchQuery = self.__portal.searchQuery
     if portalSearchQuery == "":
         portalSearchQuery = query
     else:
         if query != "*:*":
             query += " AND " + portalSearchQuery
         else:
             query = portalSearchQuery
     
     req = SearchRequest(query)
     req.setParam("facet", "true")
     req.setParam("rows", str(recordsPerPage))
     req.setParam("facet.field", self.__portal.facetFieldList)
     req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort()))
     req.setParam("facet.limit", str(self.__portal.facetCount))
     req.setParam("sort", self.__sortBy)
     
     # setup facets
     if self.__useSessionNavigation:
         action = self.formData.get("verb")
         value = self.formData.get("value")
         fq = self.sessionState.get("fq")
         if fq is not None:
             self.__pageNum = 1
             req.setParam("fq", fq)
         if action == "add_fq":
             self.__pageNum = 1
             req.addParam("fq", URLDecoder.decode(value, "UTF-8"))
         elif action == "remove_fq":
             self.__pageNum = 1
             req.removeParam("fq", URLDecoder.decode(value, "UTF-8"))
         elif action == "clear_fq":
             self.__pageNum = 1
             req.removeParam("fq")
         elif action == "select-page":
             self.__pageNum = int(value)
     else:
         navUri = uri[len(pagePath):]
         self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri)
         savedfq = self.sessionState.get("savedfq")
         limits = []
         if savedfq:
             limits.extend(savedfq)
         if fq:
             limits.extend(fq)
             self.sessionState.set("savedfq", limits)
             for q in fq:
                 req.addParam("fq", URLDecoder.decode(q, "UTF-8"))
     
     portalQuery = self.__portal.query
     if portalQuery:
         req.addParam("fq", portalQuery)
     req.addParam("fq", 'item_type:"object"')
     if req.getParams("fq"):
         self.__selected = ArrayList(req.getParams("fq"))
     
     if self.__useSessionNavigation:
         self.sessionState.set("fq", self.__selected)
         self.sessionState.set("searchQuery", portalSearchQuery)
         self.sessionState.set("pageNum", self.__pageNum)
     
     # Make sure 'fq' has already been set in the session
     if not self.page.authentication.is_admin():
         current_user = self.page.authentication.get_username()
         security_roles = self.page.authentication.get_roles_list()
         security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")'
         security_exceptions = 'security_exception:"' + current_user + '"'
         owner_query = 'owner:"' + current_user + '"'
         security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")"
         req.addParam("fq", security_query)
     
     req.setParam("start", str((self.__pageNum - 1) * recordsPerPage))
     
     print " * search.py:", req.toString(), self.__pageNum
     
     out = ByteArrayOutputStream()
     self.services.indexer.search(req, out)
     self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))
     if self.__result is not None:
         self.__paging = Pagination(self.__pageNum,
                                    int(self.__result.getNumFound()),
                                    self.__portal.recordsPerPage)