コード例 #1
0
ファイル: lookup.py プロジェクト: greg-pendlebury/redbox
 def handleQuery(self, query, fieldName, formatStr):
     out = ByteArrayOutputStream()
     req = SearchRequest(query)
     req.setParam("fq", 'item_type:"object"')
     req.setParam("fq", 'workflow_id:"dataset"')
     req.setParam("rows", "1000")
     self.indexer.search(req, out)
     res = SolrResult(ByteArrayInputStream(out.toByteArray()))
     hits = HashSet()
     if (res.getNumFound() > 0):
         results = res.getResults()
         for searchRes in results:
             searchResList = searchRes.getList(fieldName)
             if (searchResList.isEmpty()==False):
                 for hit in searchResList:
                     if self.term is not None:
                         if hit.find(self.term) != -1:
                             hits.add(hit)
                     else:
                         hits.add(hit)
         self.writer.print("[")
         hitnum = 0
         for hit in hits:
             if (hitnum > 0):
                 self.writer.print(","+formatStr % {"hit":hit})
             else:    
                 self.writer.print(formatStr % {"hit":hit})
             hitnum += 1
         self.writer.print("]")
     else:   
          self.writer.println("[\"\"]")
     self.writer.close()
コード例 #2
0
ファイル: csv.py プロジェクト: andrewjanke/redbox
 def export(self, exportType):        
     exportQuery = "%s:%s" % (self.facetField, self.facetFieldValue)
     outputType = "text/%s; charset=UTF-8" % type        
     responseHeader = "attachment; filename=%s.%s" % (self.facetFieldValue, exportType) 
     
     try:
         out = ByteArrayOutputStream() 
         recnumreq = SearchRequest(exportQuery)
         recnumreq.setParam("fl","create_timestamp")
         recnumreq.setParam("rows", "0")
         self.indexer.search(recnumreq, out)
         recnumres = SolrResult(ByteArrayInputStream(out.toByteArray()))
         self.__rowsFoundSolr = "%s" % recnumres.getNumFound()
     except:
         self.errorMsg = "Export query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
         self.log.error("Export query threw an exception (package type was %s): %s - %s" % (self.facetFieldValue, sys.exc_info()[0], sys.exc_info()[1]))
         return
     
     out = ByteArrayOutputStream()
     req = SearchRequest(exportQuery)
     req.setParam("wt", exportType)        
     req.setParam("rows", self.__rowsFoundSolr)
     self.indexer.search(req, out)
     self.response.setHeader("Content-Disposition", responseHeader)
     writer = self.response.getPrintWriter(outputType)
     writer.println(out.toString("UTF-8"))
     writer.close()
コード例 #3
0
ファイル: lookup.py プロジェクト: nishen/redbox
 def handleWorkflowStep(self):
     out = ByteArrayOutputStream()
     req = SearchRequest("workflow_step_label:[* TO *]" )
     req.setParam("fq", 'item_type:"object"')
     req.setParam("fq", 'workflow_id:"dataset"')
     req.setParam("rows", "1000")
     self.indexer.search(req, out)
     res = SolrResult(ByteArrayInputStream(out.toByteArray()))
     hits = HashSet()
     if (res.getNumFound() > 0):
         recordTypeResults = res.getResults()
         for recordTypeResult in recordTypeResults:
             recordTypeList = recordTypeResult.getList("workflow_step_label")
             if (recordTypeList.isEmpty()==False):
                 for hit in recordTypeList:
                     hits.add(hit)
         self.writer.println("[")
         
         hitnum = 0
         for hit in hits:
             if (hitnum > 0):
                 self.writer.println(",{\"value\": \"%s\",\n\"label\": \"%s\"}" % (hit,hit))
             else:    
                 self.writer.println("{\"value\": \"%s\",\n\"label\": \"%s\"}" % (hit,hit))
             hitnum += 1
         self.writer.println("]")
     else:   
          self.writer.println("[\"\"]")
     self.writer.close()
コード例 #4
0
 def handleQuery(self, query, fieldName, formatStr):
     out = ByteArrayOutputStream()
     req = SearchRequest(query)
     req.setParam("fq", 'item_type:"object"')
     req.setParam("fq", 'workflow_id:"dataset"')
     req.setParam("rows", "1000")
     self.indexer.search(req, out)
     res = SolrResult(ByteArrayInputStream(out.toByteArray()))
     hits = HashSet()
     if (res.getNumFound() > 0):
         results = res.getResults()
         for searchRes in results:
             searchResList = searchRes.getList(fieldName)
             if (searchResList.isEmpty() == False):
                 for hit in searchResList:
                     if self.term is not None:
                         if hit.find(self.term) != -1:
                             hits.add(hit)
                     else:
                         hits.add(hit)
         self.writer.print("[")
         hitnum = 0
         for hit in hits:
             if (hitnum > 0):
                 self.writer.print("," + formatStr % {"hit": hit})
             else:
                 self.writer.print(formatStr % {"hit": hit})
             hitnum += 1
         self.writer.print("]")
     else:
         self.writer.println("[\"\"]")
     self.writer.close()
コード例 #5
0
ファイル: lookup.py プロジェクト: nishen/redbox
 def handleGrantNumber(self):
     out = ByteArrayOutputStream()
     req = SearchRequest("grant_numbers:%s*" % self.term)
     req.setParam("fq", 'item_type:"object"')
     req.setParam("fq", 'workflow_id:"dataset"')
     req.setParam("rows", "1000")
     self.indexer.search(req, out)
     res = SolrResult(ByteArrayInputStream(out.toByteArray()))
     hits = HashSet()
     if (res.getNumFound() > 0):
         creatorResults = res.getResults()
         for creatorRes in creatorResults:
             creatorList = creatorRes.getList("grant_numbers")
             if (creatorList.isEmpty()==False):
                 for hit in creatorList:
                     hits.add(hit)
         self.writer.print("[")
         hitnum = 0
         for hit in hits:
             if (hitnum > 0):
                 self.writer.print(",\"%s\"" % hit)
             else:    
                 self.writer.print("\"%s\"" % hit)
             hitnum += 1
         self.writer.print("]")
     else:   
          self.writer.println("[\"\"]")
     self.writer.close()
コード例 #6
0
 def _searchSets(self, startPage=1):
     req = SearchRequest(self.getQuery())
     req.setParam("fq", 'item_type:"object"')
     req.setParam("rows", str(self.getRecordsPerPage()))
     req.setParam("start", str((startPage - 1) * self.getRecordsPerPage()))
     req.addParam("fq", self.getFilterQuery())
     req.setParam("fl", self.getReturnFields())
     req.setParam("sort", "date_object_modified desc, f_dc_title asc")
     if not self.isAdmin():
         req.addParam("fq", self.getSecurityQuery())
     out = ByteArrayOutputStream()
     self.indexer.search(req, out)
     result = SolrResult(ByteArrayInputStream(out.toByteArray()))
     self._setPaging(result.getNumFound())
     result.getJsonObject().put("lastPage", str(self.paging.getLastPage()))
     result.getJsonObject().put("curPage", str(startPage))
     return result
コード例 #7
0
 def _searchSets(self, startPage=1):
     req = SearchRequest(self.getQuery())
     req.setParam("fq", 'item_type:"object"')
     req.setParam("rows", str(self.getRecordsPerPage()))
     req.setParam("start", str((startPage - 1) * self.getRecordsPerPage()))
     req.addParam("fq", self.getFilterQuery())
     req.setParam("fl", self.getReturnFields())
     req.setParam("sort", "date_object_modified desc, f_dc_title asc")
     if not self.isAdmin():
         req.addParam("fq", self.getSecurityQuery())
     out = ByteArrayOutputStream()
     self.indexer.search(req, out)
     result = SolrResult(ByteArrayInputStream(out.toByteArray()))
     self._setPaging(result.getNumFound())
     result.getJsonObject().put("lastPage", str(self.paging.getLastPage()))
     result.getJsonObject().put("curPage", str(startPage))
     return result
コード例 #8
0
class ReportsData:
    def __init__(self):
        pass

    def __activate__(self, context):
        #import pydevd;pydevd.settrace()
        self.velocityContext = context
        self.vc("sessionState").remove("fq")
        self.services = self.vc("Services")
        self.log = context["log"]
        self.__harvestList = None
        self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            print "ERROR: Requested context entry '" + index + "' doesn't exist"
            return None

    def __search(self):
        indexer = self.services.getIndexer()
        
        # Security prep work
        isAdmin = self.vc("page").authentication.is_admin()
        if not isAdmin:
            print "ERROR: User is not an admin '"
            return None

        req = SearchRequest('eventType:harvestStart')
        req.setParam("rows", "100")
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__harvestList = SolrResult(ByteArrayInputStream(out.toByteArray()))

    
    def getHarvestlist(self):
        return self.__harvestList.getResults()

    def getItemCount(self):
        return self.__harvestList.getNumFound()
コード例 #9
0
class ReportsData:
    def __init__(self):
        pass

    def __activate__(self, context):
        #import pydevd;pydevd.settrace()
        self.velocityContext = context
        self.vc("sessionState").remove("fq")
        self.services = self.vc("Services")
        self.log = context["log"]
        self.__harvestList = None
        self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            print "ERROR: Requested context entry '" + index + "' doesn't exist"
            return None

    def __search(self):
        indexer = self.services.getIndexer()

        # Security prep work
        isAdmin = self.vc("page").authentication.is_admin()
        if not isAdmin:
            print "ERROR: User is not an admin '"
            return None

        req = SearchRequest('eventType:harvestStart')
        req.setParam("rows", "100")
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__harvestList = SolrResult(ByteArrayInputStream(
            out.toByteArray()))

    def getHarvestlist(self):
        return self.__harvestList.getResults()

    def getItemCount(self):
        return self.__harvestList.getNumFound()
コード例 #10
0
class HomeData:
    def __init__(self):
        pass

    def __activate__(self, context):
        self.velocityContext = context
        self.vc("sessionState").remove("fq")
        self.__latest = None
        self.__steps = None
        self.__alerts = None
        self.__result = None
        self.__stages = None
        self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.velocityContext["log"].error(
                "ERROR: Requested context entry '{}' doesn't exist", index)
            return None

    def __search(self):
        indexer = Services.getIndexer()
        portalQuery = Services.getPortalManager().get(
            self.vc("portalId")).getQuery()
        portalSearchQuery = Services.getPortalManager().get(
            self.vc("portalId")).getSearchQuery()

        # Security prep work
        current_user = self.vc("page").authentication.get_username()
        security_roles = self.vc("page").authentication.get_roles_list()
        security_filter = 'security_filter:("' + '" OR "'.join(
            security_roles) + '")'
        security_exceptions = 'security_exception:"' + current_user + '"'
        owner_query = 'owner:"' + current_user + '"'
        security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")"
        isAdmin = self.vc("page").authentication.is_admin()

        req = SearchRequest("*:*")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.addParam("fq", "")
        req.setParam("rows", "0")
        req.setParam("facet", "true")
        req.setParam("facet.field", "workflow_step")
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        steps = SolrResult(ByteArrayInputStream(out.toByteArray()))
        self.__steps = steps.getFacets().get("workflow_step")

        wfConfig = JsonSimple(
            FascinatorHome.getPathFile("harvest/workflows/dataset.json"))
        jsonStageList = wfConfig.getJsonSimpleList(["stages"])
        stages = []
        for jsonStage in jsonStageList:
            wfStage = WorkflowStage(jsonStage, self.__steps)
            stages.append(wfStage)
        self.__stages = stages

        req = SearchRequest("*:*")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.addParam("fq", "")
        req.setParam("rows", "25")
        req.setParam("sort", "last_modified desc, f_dc_title asc")
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req.addParam("fq", "workflow_step:%s" % stages[0].getName())
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__alerts = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req = SearchRequest(
            "last_modified:[NOW-1MONTH TO *] AND workflow_step:live")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc")
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray()))

        self.vc("sessionState").set("fq", 'item_type:"object"')

    def getLatest(self):
        return self.__latest.getResults()

    def getAlerts(self):
        return self.__alerts.getResults()

    def getItemCount(self):
        return self.__result.getNumFound()

    def getStages(self):
        return self.__stages
コード例 #11
0
class SearchData:
    def __activate__(self, context):
        self.services = context["Services"]
        self.page = context["page"]
        self.formData = context["formData"]
        self.portalId = context["portalId"]
        self.sessionState = context["sessionState"]
        self.request = context["request"]
        self.pageName = context["pageName"]
        self.log = context["log"]

        self.__portal = context["page"].getPortal()
        self.__useSessionNavigation = self.__portal.getBoolean(
            True, ["portal", "use-session-navigation"])
        self.__result = None
        if self.__useSessionNavigation:
            self.__pageNum = self.sessionState.get("pageNum", 1)
        else:
            self.__pageNum = 1
        self.__selected = ArrayList()
        self.__fqParts = []
        self.__searchField = self.formData.get("searchField", "full_text")

        self.__sortField = self.formData.get("sort-field")
        self.__sortOrder = self.formData.get("sort-order")
        if not (self.__sortField or self.__sortOrder):
            # use form data not specified, check session
            sortField = self.__portal.sortFieldDefault or "score"
            sortOrder = self.__portal.sortFieldDefaultOrder or "desc"
            #print "f:%s,o:%s" % (sortField, sortOrder)
            self.__sortField = self.sessionState.get("sortField", sortField)
            self.__sortOrder = self.sessionState.get("sortOrder", sortOrder)
        self.sessionState.set("sortField", self.__sortField)
        self.sessionState.set("sortOrder", self.__sortOrder)
        self.__sortBy = "%s %s" % (self.__sortField, self.__sortOrder)

        # reset the query and facet selections when changing views
        lastPortalId = self.sessionState.get("lastPortalId")
        if lastPortalId != self.portalId:
            self.log.info(
                "Portal changed. Resetting saved search values in session.")
            self.sessionState.remove("fq")
            self.sessionState.remove("pageNum")
            self.sessionState.remove("sortField")
            self.sessionState.remove("sortOrder")
            self.__pageNum = 1
            self.sessionState.set("lastPortalId", self.portalId)

        self.__search()

    def usingSessionNavigation(self):
        return self.__restful

    def getPortalName(self):
        return self.__portal.getDescription()

    def getSearchField(self):
        return self.__searchField

    def __search(self):
        requireEscape = False
        recordsPerPage = self.__portal.recordsPerPage
        uri = URLDecoder.decode(self.request.getAttribute("RequestURI"))
        query = None
        pagePath = self.__portal.getName() + "/" + self.pageName
        if query is None or query == "":
            query = self.formData.get("query")
            requireEscape = True
        if query is None or query == "":
            query = "*:*"

        if query == "*:*":
            self.__query = ""
        else:
            self.__query = query
            if requireEscape:
                query = self.__escapeQuery(query)
            query = "%s:%s" % (self.__searchField, query)
        self.sessionState.set("query", self.__query)

        # find objects with annotations matching the query
        if query != "*:*":
            anotarQuery = self.__query
            if requireEscape:
                anotarQuery = self.__escapeQuery(anotarQuery)
            annoReq = SearchRequest(anotarQuery)
            annoReq.setParam("facet", "false")
            annoReq.setParam("rows", str(99999))
            annoReq.setParam("sort", "dateCreated asc")
            annoReq.setParam("start", str(0))
            anotarOut = ByteArrayOutputStream()
            self.services.indexer.annotateSearch(annoReq, anotarOut)
            resultForAnotar = SolrResult(
                ByteArrayInputStream(anotarOut.toByteArray()))
            resultForAnotar = resultForAnotar.getResults()
            ids = HashSet()
            for annoDoc in resultForAnotar:
                annotatesUri = annoDoc.getFirst("annotatesUri")
                ids.add(annotatesUri)
                self.log.debug("Found annotation for %s" % annotatesUri)
            # add annotation ids to query
            query += ' OR id:("' + '" OR "'.join(ids) + '")'

        portalSearchQuery = self.__portal.searchQuery
        if portalSearchQuery == "":
            portalSearchQuery = query
        else:
            if query != "*:*":
                query += " AND " + portalSearchQuery
            else:
                query = portalSearchQuery

        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", str(recordsPerPage))
        req.setParam("facet.field", self.__portal.facetFieldList)
        req.setParam("facet.sort",
                     Boolean.toString(self.__portal.getFacetSort()))
        req.setParam("facet.limit", str(self.__portal.facetCount))
        req.setParam("sort", self.__sortBy)

        # setup facets
        if self.__useSessionNavigation:
            action = self.formData.get("verb")
            value = self.formData.get("value")
            fq = self.sessionState.get("fq")
            if fq is not None:
                self.__pageNum = 1
                req.setParam("fq", fq)
            if action == "add_fq":
                self.__pageNum = 1
                req.addParam("fq", URLDecoder.decode(value, "UTF-8"))
            elif action == "remove_fq":
                self.__pageNum = 1
                req.removeParam("fq", URLDecoder.decode(value, "UTF-8"))
            elif action == "clear_fq":
                self.__pageNum = 1
                req.removeParam("fq")
            elif action == "select-page":
                self.__pageNum = int(value)
        else:
            navUri = uri[len(pagePath):]
            self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri)
            savedfq = self.sessionState.get("savedfq")
            limits = []
            if savedfq:
                limits.extend(savedfq)
            if fq:
                limits.extend(fq)
                self.sessionState.set("savedfq", limits)
                for q in fq:
                    req.addParam("fq", URLDecoder.decode(q, "UTF-8"))

        portalQuery = self.__portal.query
        if portalQuery:
            req.addParam("fq", portalQuery)
        req.addParam("fq", 'item_type:"object"')
        if req.getParams("fq"):
            self.__selected = ArrayList(req.getParams("fq"))

        if self.__useSessionNavigation:
            self.sessionState.set("fq", self.__selected)
            self.sessionState.set("searchQuery", portalSearchQuery)
            self.sessionState.set("pageNum", self.__pageNum)

        # Make sure 'fq' has already been set in the session
        if not self.page.authentication.is_admin():
            current_user = self.page.authentication.get_username()
            security_roles = self.page.authentication.get_roles_list()
            security_filter = 'security_filter:("' + '" OR "'.join(
                security_roles) + '")'
            security_exceptions = 'security_exception:"' + current_user + '"'
            owner_query = 'owner:"' + current_user + '"'
            security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")"
            req.addParam("fq", security_query)
        ## uncomment to ensure guest users not logged in cannot see alerts in browse page
        # self.filterOutWorkflowStepForUnAuth(req, "inbox")

        req.setParam("start", str((self.__pageNum - 1) * recordsPerPage))

        self.log.debug(" * search.py: %s, page: %s" %
                       (req.toString(), self.__pageNum))

        out = ByteArrayOutputStream()
        self.services.indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))
        if self.__result is not None:
            self.__paging = Pagination(self.__pageNum,
                                       self.__result.getNumFound(),
                                       self.__portal.recordsPerPage)

    def filterOutWorkflowStepForUnAuth(self, req, workflow_step):
        if self.page.authentication.get_username(
        ) == 'guest' and not self.page.authentication.is_logged_in():
            step_filter = [workflow_step]
            no_alerts_for_non_auth = ' - workflow_step:("' + ''.join(
                step_filter) + '")'
            req.addParam("fq", no_alerts_for_non_auth)

    def __escapeQuery(self, q):
        temp = ""
        chars = "+-&|!(){}[]^\"~*?:\\"
        for c in q:
            if c in chars:
                temp += "\%s" % c
            else:
                temp += c

        return temp

#        eq = q
#        # escape all solr/lucene special chars
#        # from http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Escaping%20Special%20Characters
#        for c in "+-&|!(){}[]^\"~*?:\\":
#            eq = eq.replace(c, "\\%s" % c)
#        ## Escape UTF8
#        try:
#            return URLEncoder.encode(eq, "UTF-8")
#        except UnsupportedEncodingException, e:
#            print "Error during UTF8 escape! ", repr(eq)
#            return eq

    def getQueryTime(self):
        return int(self.__result.getQueryTime()) / 1000.0

    def getPaging(self):
        return self.__paging

    def getResult(self):
        return self.__result

    def getFacetField(self, key):
        return self.__portal.facetFields.get(key)

    def getFacetName(self, key):
        return self.__portal.facetFields.get(key).getString(None, ["label"])

    def getFacetCounts(self, key):
        if self.__useSessionNavigation:
            facetData = self.__result.getFacets()
            if facetData is None:
                return LinkedHashMap()
            if not facetData.containsKey(key):
                return LinkedHashMap()
            return facetData.get(key).values()
        else:
            return LinkedHashMap()
        # TODO : What were these doing? Hiding file path facets unless some facets are selected?
        #if name.find("/") == -1 or self.hasSelectedFacets():
        #    values.put(name, count)

    def getFacetDisplay(self):
        return self.__portal.facetDisplay

    def hasSelectedFacets(self):
        return (self.__selected is not None and len(self.__selected) > 1) and \
            not (self.__portal.query in self.__selected and len(self.__selected) == 2)

    def getSelectedFacets(self):
        return self.__selected

    def isPortalQueryFacet(self, fq):
        return fq == self.__portal.query

    def isSelected(self, fq):
        return fq in self.__selected

    def getSelectedFacetIds(self):
        ## Returns a native Python array
        ## Python has indicator of u' for unicode string which causes JS complain.
        ## Because MD5 hash is made from letters we can encode them in ascii
        return [
            DigestUtils.md5Hex(fq).encode("ascii", "ignore")
            for fq in self.__selected
        ]

    def getFileName(self, path):
        return os.path.splitext(os.path.basename(path))[0]

    def getFacetQuery(self, name, value):
        return '%s:"%s"' % (name, value)

    # Packaging support
    def getActiveManifestTitle(self):
        return self.__getActiveManifest().getTitle()

    def getActiveManifestId(self):
        return self.sessionState.get("package/active/id")

    def getSelectedItemsCount(self):
        return self.__getActiveManifest().size()

    def isSelectedForPackage(self, oid):
        result = self.__getActiveManifest().getNode("node-%s" % oid)
        return (result is not None)

    def getManifestItemTitle(self, oid, defaultValue):
        result = self.__getActiveManifest().getNode("node-%s" % oid)
        if result is None:
            return defaultValue
        return result.getTitle()

    def __getActiveManifest(self):
        activeManifest = self.sessionState.get("package/active")
        if not activeManifest:
            activeManifest = Manifest(None)
            activeManifest.setTitle("New package")
            activeManifest.setViewId(self.__portal.getName())
            self.sessionState.set("package/active", activeManifest)
        return activeManifest

    def isSelectableForPackage(self, oid):
        return oid != self.getActiveManifestId()

    def getSortFields(self):
        return self.__portal.sortFields

    def getSortField(self):
        return self.__sortField

    def getSortOrder(self):
        return self.__sortOrder

    # RESTful style URL support methods
    def getPageQuery(self, page):
        prefix = ""
        if self.__fqParts:
            prefix = "/" + "/".join(self.__fqParts)
        suffix = ""
        if page > 1:
            suffix = "/page/%s" % page
        return prefix + suffix

    def getFacetQueryUri(self, name, value):
        return "%s/%s" % (name, value)

    def getFacetValue(self, facetValue):
        return facetValue.split("/")[-1]

    def getLimitQueryWith(self, fq):
        limits = ArrayList(self.__fqParts)
        limits.add("category/" + fq)
        return "/".join(limits)

    def getFacetIndent(self, facetValue):
        return len(facetValue.split("/"))

    def getLimitQueryWithout(self, fq):
        limits = ArrayList(self.__fqParts)
        limits.remove("category/" + fq)
        if limits.isEmpty():
            return ""
        return "/".join(limits)

    def __parseUri(self, uri):
        page = 1
        fq = []
        fqParts = []
        if uri != "":
            parts = uri.split("/")
            partType = None
            facetKey = None
            facetValues = None
            for part in parts:
                if partType == "page":
                    facetKey = None
                    page = int(part)
                elif partType == "category":
                    partType = "category-value"
                    facetValues = None
                    facetKey = part
                elif partType == "category-value":
                    if facetValues is None:
                        facetValues = []
                    if part in ["page", "category"]:
                        partType = part
                        facetQuery = '%s:"%s"' % (facetKey,
                                                  "/".join(facetValues))
                        fq.append(facetQuery)
                        fqParts.append("category/%s/%s" %
                                       (facetKey, "/".join(facetValues)))
                        facetKey = None
                        facetValues = None
                    else:
                        facetValues.append(URLDecoder.decode(part))
                else:
                    partType = part
            if partType == "category-value":
                facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues))
                fq.append(facetQuery)
                fqParts.append("category/%s/%s" %
                               (facetKey, "/".join(facetValues)))
        return page, fq, fqParts
コード例 #12
0
class HomeData:
    def __init__(self):
        pass

    def __activate__(self, context):
        self.velocityContext = context
        self.vc("sessionState").remove("fq")
        self.services = self.vc("Services")
        self.__latest = None
        self.__mine = None
        self.__workflows = None
        self.__result = None
        self.__steps = None
        self.__selfservicesStages = None
        self.__search()

    # Get from velocity context Mint version
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            print "ERROR: Requested context entry '" + index + "' doesn't exist"
            return None

    def __search(self):
        indexer = self.services.getIndexer()
        portalQuery = self.services.getPortalManager().get(self.vc("portalId")).getQuery()
        portalSearchQuery = self.services.getPortalManager().get(self.vc("portalId")).getSearchQuery()
        
        # Security prep work
        current_user = self.vc("page").authentication.get_username()
        security_roles = self.vc("page").authentication.get_roles_list()
        security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")'
        security_exceptions = 'security_exception:"' + current_user + '"'
        owner_query = 'owner:"' + current_user + '"'
        security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")"
        isAdmin = self.vc("page").authentication.is_admin()

        req = SearchRequest("last_modified:[NOW-1MONTH TO *]")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest(owner_query)
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req = SearchRequest('workflow_security:"' + current_user + '"')
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req = SearchRequest("*:*")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.addParam("fq", "")
        req.setParam("rows", "0")
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        
        self.vc("sessionState").set("fq", 'item_type:"object"')
        #sessionState.set("query", portalQuery.replace("\"", "'"))
        
        # Load in the services UI workflow
        selfSubmitWfConfig = JsonSimple(FascinatorHome.getPathFile("harvest/workflows/servicesUI.json"))
        selfSubmitJsonStageList = selfSubmitWfConfig.getJsonSimpleList(["stages"])
        servicesStages = []
        for jsonStage in selfSubmitJsonStageList:
            wfStage = WorkflowStage(jsonStage, self.__steps)
            servicesStages.append(wfStage)
        self.__selfservicesStages = servicesStages
        
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))
    
    def getLatest(self):
        return self.__latest.getResults()
    
    def getMine(self):
        return self.__mine.getResults()

    def getWorkflows(self):
        return self.__workflows.getResults()

    def getItemCount(self):
        return self.__result.getNumFound()

    def getServicesStages(self):
        return self.__servicesStages
コード例 #13
0
ファイル: home.py プロジェクト: greg-pendlebury/redbox
class HomeData:
    def __init__(self):
        pass

    def __activate__(self, context):
        self.velocityContext = context
        self.vc("sessionState").remove("fq")
        self.__latest = None
        self.__steps = None
        self.__alerts = None
        self.__result = None
        self.__stages = None
        self.__embargoes = None
        self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.velocityContext["log"].error("ERROR: Requested context entry '{}' doesn't exist", index)
            return None

    def __search(self):
        indexer = Services.getIndexer()
        portalQuery = Services.getPortalManager().get(self.vc("portalId")).getQuery()
        portalSearchQuery = Services.getPortalManager().get(self.vc("portalId")).getSearchQuery()

        # Security prep work
        current_user = self.vc("page").authentication.get_username()
        security_roles = self.vc("page").authentication.get_roles_list()
        security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")'
        security_exceptions = 'security_exception:"' + current_user + '"'
        owner_query = 'owner:"' + current_user + '"'
        security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")"
        isAdmin = self.vc("page").authentication.is_admin()

        req = SearchRequest("*:*")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.addParam("fq", "")
        req.setParam("rows", "0")
        req.setParam("facet", "true")
        req.setParam("facet.field", "workflow_step")
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        steps = SolrResult(ByteArrayInputStream(out.toByteArray()))
        self.__steps = steps.getFacets().get("workflow_step")

        wfConfig = JsonSimple(FascinatorHome.getPathFile("harvest/workflows/dataset.json"))
        jsonStageList = wfConfig.getJsonSimpleList(["stages"])
        stages = []
        for jsonStage in jsonStageList:
            wfStage = WorkflowStage(jsonStage, self.__steps)
            stages.append(wfStage)
        self.__stages = stages

        req = SearchRequest("*:*")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.addParam("fq", "")
        req.setParam("rows", "25")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req.addParam("fq", "workflow_step:%s" % stages[0].getName())
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__alerts = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req = SearchRequest("last_modified:[NOW-1MONTH TO *] AND workflow_step:live")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray()))
        self._searchEmbargoes()
        self.vc("sessionState").set("fq", 'item_type:"object"')

    def getLatest(self):
        return self.__latest.getResults()

    def getAlerts(self):
        return self.__alerts.getResults()

    def getItemCount(self):
        return self.__result.getNumFound()

    def getStages(self):
        return self.__stages
        
    def getEmbargoes(self):
		return self.__embargoes.getResults()
		
    def _searchEmbargoes(self):
        req = SearchRequest("item_type:object")
        req.setParam("fq", 'redbox\:embargo.redbox\:isEmbargoed:on')
        req.addParam("fq", 'workflow_step:final-review')
        req.addParam("fq", "")
        req.setParam("fl","id,date_embargoed,dc_title")
        req.setParam("rows", "25")
        req.setParam("sort", "date_embargoed asc, dc_title asc");

        out = ByteArrayOutputStream()
        indexer = Services.getIndexer()
        indexer.search(req, out)
        self.__embargoes = SolrResult(ByteArrayInputStream(out.toByteArray()))
        self.velocityContext["log"].info("searchEmbargoes call ended" + str(self.__embargoes))

    def formatDate(self, date):    
        dfSource = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")
        dfTarget = SimpleDateFormat("dd/MM/yyyy")
        return dfTarget.format(dfSource.parse(date))
コード例 #14
0
ファイル: oai.py プロジェクト: kiranba/the-fascinator
class OaiData:
    def __init__(self):
        self.tokensDB = None

    def __activate__(self, context):
        if self.tokensDB is None:
            self.tokensDB = TokensDatabase(context)

        # Set up configuration
        self.systemConfig = JsonSimpleConfig()
        self.oaiConfig = None
        self.getMetadataFormats()

        self.velocityContext = context
        self.services = context["Services"]
        self.log = context["log"]
        self.sessionState = context["sessionState"]
        self.portalDir = context["portalDir"]

        self.__result = None
        self.lastPage = False

        # Check if the OAI request has an overriding portal ('set') to the URL
        paramSet = self.vc("formData").get("set")
        self.__portalName = context["page"].getPortal().getName()
        illegalSet = False
        if paramSet is not None:
            portals = self.vc("page").getPortals().keySet()
            if portals.contains(paramSet):
                self.__portalName = paramSet
            else:
                illegalSet = True

        self.__metadataPrefix = ""
        self.__sessionExpiry = self.systemConfig.getInteger(None, ["portal", "oai-pmh", "sessionExpiry"])

        # Check if there's a resumption token in the formData
        self.__currentToken = None
        resumptionToken = self.vc("formData").get("resumptionToken")
        if resumptionToken is not None:
            # Split out the start component from the actual resumption token
            (resumptionTokenPart, start) = resumptionToken.strip().split(":")
            # This could still be be null
            self.__currentToken = self.tokensDB.getToken(resumptionTokenPart)
            # Code to handle null token is handled later on
            if self.__currentToken is not None:
                self.__currentToken.setStart(start)

        # Process/parse the request we've received for validity
        self.vc("request").setAttribute("Content-Type", "text/xml")
        self.__request = OaiPmhVerb(context, self.tokensDB, self.__currentToken)
        if self.getError() is None and illegalSet:
            self.__request.setError("badArgument", "Set '%s' is not valid!" % paramSet)

        # If there are no errors... and the request requires some additional
        #  data (like a search result) do so now. Everything else can be
        #  handled in the templates.
        if self.getError() is None and self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]:

            # Find the metadata prefix requested
            self.__metadataPrefix = self.vc("formData").get("metadataPrefix")
            if self.__metadataPrefix is None:
                self.__metadataPrefix = self.__currentToken.getMetadataPrefix()

            # Only list records if the metadata format is enabled in this view
            if self.isInView(self.__metadataPrefix):
                self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist")
            return None

    def isInView(self, format, view=None):
        # Sanity check
        if format is None or format == "":
            return False
        # Default to current poral
        if view is None:
            view = self.__portalName

        # Make sure there is some config for this format
        formatConfig = self.getMetadataFormats().get(format)
        if formatConfig is None:
            return False
        # Is it visible everywhere?
        allViews = formatConfig.getBoolean(False, ["enabledInAllViews"])
        if allViews:
            return True
        # Check if it is visible in this view
        else:
            allowedViews = formatConfig.getStringList(["enabledViews"])
            if view in allowedViews:
                return True
        # Rejection
        return False

    def getID(self, item):
        identifier = item.getFirst("oai_identifier")
        # Fallback to the default
        if identifier is None or identifier == "":
            return "oai:fascinator.usq.edu.au:" + item.getFirst("id")
        # Use the indexed value
        return identifier

    def isDeleted(self, item):
        return bool(item.getFirst("oai_deleted"))

    def getSet(self, item):
        set = item.getFirst("oai_set")
        # Fallback to the portal name
        if set is None or set == "":
            return self.__portalName
        # Use the required set
        return set

    def getVerb(self):
        return self.getRequest().getVerb()

    def getError(self):
        return self.getRequest().getError()

    def getResponseDate(self):
        return time.strftime("%Y-%m-%dT%H:%M:%SZ")

    def getRequest(self):
        return self.__request

    def getResult(self):
        return self.__result

    def getElement(self, elementName, values):
        elementStr = ""
        if values:
            for value in values:
                elementStr += "<%s>%s</%s>" % (elementName, value, elementName)
        return elementStr

    def __search(self):
        self.__result = SolrResult(None)

        portal = self.services.getPortalManager().get(self.__portalName)
        recordsPerPage = portal.recordsPerPage

        # Resolve our identifier
        id = self.vc("formData").get("identifier")
        query = "*:*"
        if id is not None and id != "":
            # A default TF2 OID
            if id.startswith("oai:fascinator.usq.edu.au:"):
                idString = id.replace("oai:fascinator.usq.edu.au:", "")
                idString = self.__escapeQuery(idString)
                query = "id:" + idString
            # Or a custom OAI ID
            else:
                idString = self.__escapeQuery(id)
                query = "oai_identifier:" + idString

        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", str(recordsPerPage))
        req.setParam("facet.field", portal.facetFieldList)
        req.setParam("facet.limit", str(portal.facetCount))
        req.setParam("sort", "f_dc_title asc")

        portalQuery = portal.query
        if portalQuery:
            req.addParam("fq", portalQuery)
        req.addParam("fq", "item_type:object")

        # Date data... is supplied
        fromDate = self.__request.getFromDate()
        untilDate = self.__request.getUntilDate()
        if fromDate is not None:
            fromStr = fromDate.isoformat() + "Z"
            self.log.debug("From Date: '{}'", fromStr)
            if untilDate is not None:
                untilStr = untilDate.isoformat() + "Z"
                self.log.debug("Until Date: '{}'", untilStr)
                queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr)
            else:
                queryStr = "last_modified:[%s TO *]" % (fromStr)
            self.log.debug("Date query: '{}'", queryStr)
            req.addParam("fq", queryStr)
        else:
            if untilDate is not None:
                untilStr = untilDate.isoformat() + "Z"
                self.log.debug("Until Date: '{}'", untilDate.isoformat())
                queryStr = "last_modified:[* TO %s]" % (untilStr)
                self.log.debug("Date query: '{}'", queryStr)
                req.addParam("fq", queryStr)

        # Check if there's resumption token exist in the formData
        newToken = None
        if self.__currentToken is not None:
            start = int(self.__currentToken.getStart())
            totalFound = int(self.__currentToken.getTotalFound())
            nextTokenStart = start + recordsPerPage
            if nextTokenStart < totalFound:
                newToken = self.__currentToken
                newToken.resetExpiry(self.__sessionExpiry)
                newToken.setStart(nextTokenStart)
        # or start a new resumption token
        else:
            start = 0
            newToken = ResumptionToken(None, recordsPerPage, self.__metadataPrefix, self.__sessionExpiry)

        req.setParam("start", str(start))

        out = ByteArrayOutputStream()
        self.services.indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))

        totalFound = self.__result.getNumFound()
        if totalFound == 0:
            newToken = None
            # If an ID was requested, and not found, this is an error
            if id is not None and id != "":
                self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id)
            else:
                self.__request.setError("noRecordsMatch", "No records match this request")

        # We need to store this for NEW tokens
        elif self.__currentToken is None:
            # Assuming there are enough results to even keep the token
            if newToken.getStart() < totalFound:
                newToken.setTotalFound(totalFound)
            else:
                newToken = None
        # Check if we need to remove the resumption token
        else:
            if (start + recordsPerPage) >= totalFound:
                self.tokensDB.removeToken(self.__currentToken)
                self.lastPage = True

        # Store/update the resumption token
        if newToken is not None:
            # Brand new token
            if self.__currentToken is None:
                self.tokensDB.storeToken(newToken)
            # Or update an old token
            else:
                self.tokensDB.updateToken(newToken)
            self.__currentToken = newToken

    def getToken(self):
        if self.isInView(self.__metadataPrefix) and not self.lastPage:
            return self.__currentToken
        return None

    def getMetadataFormats(self):
        if self.oaiConfig is None:
            self.oaiConfig = self.systemConfig.getJsonSimpleMap(["portal", "oai-pmh", "metadataFormats"])
        return self.oaiConfig

    def encodeXml(self, string):
        return StringEscapeUtils.escapeXml(string)

    def getPayload(self, oid, metadataFileName):
        # First get the Object from storage
        object = None
        try:
            object = self.services.getStorage().getObject(oid)
        except StorageException, e:
            return None

        # Check whether the payload exists
        try:
            return object.getPayload(metadataFileName)
        except StorageException, e:
            return None
コード例 #15
0
ファイル: search.py プロジェクト: kiranba/the-fascinator
class SearchData:
    def __activate__(self, context):
        self.services = context["Services"]
        self.page = context["page"]
        self.formData = context["formData"]
        self.portalId = context["portalId"]
        self.sessionState = context["sessionState"]
        self.request = context["request"]
        self.pageName = context["pageName"]
        self.log = context["log"]

        self.__portal = context["page"].getPortal()
        self.__useSessionNavigation = self.__portal.getBoolean(True, ["portal", "use-session-navigation"])
        self.__result = None
        if self.__useSessionNavigation:
            self.__pageNum = self.sessionState.get("pageNum", 1)
        else:
            self.__pageNum = 1
        self.__selected = ArrayList()
        self.__fqParts = []
        self.__searchField = self.formData.get("searchField", "full_text")
        
        self.__sortField = self.formData.get("sort-field")
        self.__sortOrder = self.formData.get("sort-order")
        if not (self.__sortField or self.__sortOrder):
            # use form data not specified, check session
            sortField = self.__portal.sortFieldDefault or "score"
            sortOrder = self.__portal.sortFieldDefaultOrder or "desc"
            #print "f:%s,o:%s" % (sortField, sortOrder)
            self.__sortField = self.sessionState.get("sortField", sortField)
            self.__sortOrder = self.sessionState.get("sortOrder", sortOrder)
        self.sessionState.set("sortField", self.__sortField)
        self.sessionState.set("sortOrder", self.__sortOrder)
        self.__sortBy = "%s %s" % (self.__sortField, self.__sortOrder)
        
        # reset the query and facet selections when changing views
        lastPortalId = self.sessionState.get("lastPortalId")
        if lastPortalId != self.portalId:
            self.sessionState.remove("fq")
            self.sessionState.remove("pageNum")
            self.sessionState.remove("sortField")
            self.sessionState.remove("sortOrder")
            self.__pageNum = 1
            self.sessionState.set("lastPortalId", self.portalId)
        
        self.__search()

    def usingSessionNavigation(self):
        return self.__restful
    
    def getPortalName(self):
        return self.__portal.getDescription()
    
    def getSearchField(self):
        return self.__searchField
    
    def __search(self):
        requireEscape = False
        recordsPerPage = self.__portal.recordsPerPage
        uri = URLDecoder.decode(self.request.getAttribute("RequestURI"))
        query = None
        pagePath = self.__portal.getName() + "/" + self.pageName
        if query is None or query == "":
            query = self.formData.get("query")
            requireEscape = True
        if query is None or query == "":
            query = "*:*"
        
        if query == "*:*":
            self.__query = ""
        else:
            self.__query = query
            if requireEscape:
                query = self.__escapeQuery(query)
            query = "%s:%s" % (self.__searchField, query)
        self.sessionState.set("query", self.__query)
        
        # find objects with annotations matching the query
        if query != "*:*":
            anotarQuery = self.__query
            if requireEscape:
                anotarQuery = self.__escapeQuery(anotarQuery)
            annoReq = SearchRequest(anotarQuery)
            annoReq.setParam("facet", "false")
            annoReq.setParam("rows", str(99999))
            annoReq.setParam("sort", "dateCreated asc")
            annoReq.setParam("start", str(0))
            anotarOut = ByteArrayOutputStream()
            self.services.indexer.annotateSearch(annoReq, anotarOut)
            resultForAnotar = SolrResult(ByteArrayInputStream(anotarOut.toByteArray()))
            resultForAnotar = resultForAnotar.getResults()
            ids = HashSet()
            for annoDoc in resultForAnotar:
                annotatesUri = annoDoc.getFirst("annotatesUri")
                ids.add(annotatesUri)
                self.log.debug("Found annotation for %s" % annotatesUri)
            # add annotation ids to query
            query += ' OR id:("' + '" OR "'.join(ids) + '")'
        
        portalSearchQuery = self.__portal.searchQuery
        if portalSearchQuery == "":
            portalSearchQuery = query
        else:
            if query != "*:*":
                query += " AND " + portalSearchQuery
            else:
                query = portalSearchQuery
        
        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", str(recordsPerPage))
        req.setParam("facet.field", self.__portal.facetFieldList)
        req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort()))
        req.setParam("facet.limit", str(self.__portal.facetCount))
        req.setParam("sort", self.__sortBy)
        
        # setup facets
        if self.__useSessionNavigation:
            action = self.formData.get("verb")
            value = self.formData.get("value")
            fq = self.sessionState.get("fq")
            if fq is not None:
                self.__pageNum = 1
                req.setParam("fq", fq)
            if action == "add_fq":
                self.__pageNum = 1
                req.addParam("fq", URLDecoder.decode(value, "UTF-8"))
            elif action == "remove_fq":
                self.__pageNum = 1
                req.removeParam("fq", URLDecoder.decode(value, "UTF-8"))
            elif action == "clear_fq":
                self.__pageNum = 1
                req.removeParam("fq")
            elif action == "select-page":
                self.__pageNum = int(value)
        else:
            navUri = uri[len(pagePath):]
            self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri)
            savedfq = self.sessionState.get("savedfq")
            limits = []
            if savedfq:
                limits.extend(savedfq)
            if fq:
                limits.extend(fq)
                self.sessionState.set("savedfq", limits)
                for q in fq:
                    req.addParam("fq", URLDecoder.decode(q, "UTF-8"))
        
        portalQuery = self.__portal.query
        if portalQuery:
            req.addParam("fq", portalQuery)
        req.addParam("fq", 'item_type:"object"')
        if req.getParams("fq"):
            self.__selected = ArrayList(req.getParams("fq"))
        
        if self.__useSessionNavigation:
            self.sessionState.set("fq", self.__selected)
            self.sessionState.set("searchQuery", portalSearchQuery)
            self.sessionState.set("pageNum", self.__pageNum)
        
        # Make sure 'fq' has already been set in the session
        if not self.page.authentication.is_admin():
            current_user = self.page.authentication.get_username()
            security_roles = self.page.authentication.get_roles_list()
            security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")'
            security_exceptions = 'security_exception:"' + current_user + '"'
            owner_query = 'owner:"' + current_user + '"'
            security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")"
            req.addParam("fq", security_query)
        
        req.setParam("start", str((self.__pageNum - 1) * recordsPerPage))
        
        #print " * search.py:", req.toString(), self.__pageNum
        
        out = ByteArrayOutputStream()
        self.services.indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))
        if self.__result is not None:
            self.__paging = Pagination(self.__pageNum,
                                       self.__result.getNumFound(),
                                       self.__portal.recordsPerPage)
    
    def __escapeQuery(self, q):
        temp = ""
        chars = "+-&|!(){}[]^\"~*?:\\"
        for c in q:
           if c in chars:
             temp += "\%s" % c
           else:
             temp += c
        
        return temp 
        
#        eq = q
#        # escape all solr/lucene special chars
#        # from http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Escaping%20Special%20Characters
#        for c in "+-&|!(){}[]^\"~*?:\\":
#            eq = eq.replace(c, "\\%s" % c)
#        ## Escape UTF8
#        try:
#            return URLEncoder.encode(eq, "UTF-8")
#        except UnsupportedEncodingException, e:
#            print "Error during UTF8 escape! ", repr(eq)
#            return eq
    
    def getQueryTime(self):
        return int(self.__result.getQueryTime()) / 1000.0;
    
    def getPaging(self):
        return self.__paging
    
    def getResult(self):
        return self.__result
    
    def getFacetField(self, key):
        return self.__portal.facetFields.get(key)
    
    def getFacetName(self, key):
        return self.__portal.facetFields.get(key).getString(None, ["label"])
    
    def getFacetCounts(self, key):
        if self.__useSessionNavigation:
            facetData = self.__result.getFacets()
            if facetData is None:
                return LinkedHashMap()
            if not facetData.containsKey(key):
                return LinkedHashMap()
            return facetData.get(key).values()
        else:
            return LinkedHashMap()
        # TODO : What were these doing? Hiding file path facets unless some facets are selected?
        #if name.find("/") == -1 or self.hasSelectedFacets():
        #    values.put(name, count)
    
    def getFacetDisplay(self):
        return self.__portal.facetDisplay
    
    def hasSelectedFacets(self):
        return (self.__selected is not None and len(self.__selected) > 1) and \
            not (self.__portal.query in self.__selected and len(self.__selected) == 2)
    
    def getSelectedFacets(self):
        return self.__selected
    
    def isPortalQueryFacet(self, fq):
        return fq == self.__portal.query
    
    def isSelected(self, fq):
        return fq in self.__selected
    
    def getSelectedFacetIds(self):
        return [md5.new(fq).hexdigest() for fq in self.__selected]
    
    def getFileName(self, path):
        return os.path.splitext(os.path.basename(path))[0]
    
    def getFacetQuery(self, name, value):
        return '%s:"%s"' % (name, value)
    
    # Packaging support
    def getActiveManifestTitle(self):
        return self.__getActiveManifest().getTitle()
    
    def getActiveManifestId(self):
        return self.sessionState.get("package/active/id")
    
    def getSelectedItemsCount(self):
        return self.__getActiveManifest().size()
    
    def isSelectedForPackage(self, oid):
        result = self.__getActiveManifest().getNode("node-%s" % oid)
        return (result is not None)
    
    def getManifestItemTitle(self, oid, defaultValue):
        result = self.__getActiveManifest().getNode("node-%s" % oid)
        if result is None:
            return defaultValue
        return result.getTitle()
    
    def __getActiveManifest(self):
        activeManifest = self.sessionState.get("package/active")
        if not activeManifest:
            activeManifest = Manifest(None)
            activeManifest.setTitle("New package")
            activeManifest.setViewId(self.__portal.getName())
            self.sessionState.set("package/active", activeManifest)
        return activeManifest

    def isSelectableForPackage(self, oid):
        return oid != self.getActiveManifestId()
    
    def getSortFields(self):
        return self.__portal.sortFields
    
    def getSortField(self):
        return self.__sortField
    
    def getSortOrder(self):
        return self.__sortOrder
    
    # RESTful style URL support methods
    def getPageQuery(self, page):
        prefix = ""
        if self.__fqParts:
            prefix = "/" + "/".join(self.__fqParts)
        suffix = ""
        if page > 1:
            suffix = "/page/%s" % page
        return prefix + suffix
    
    def getFacetQueryUri(self, name, value):
        return "%s/%s" % (name, value)
    
    def getFacetValue(self, facetValue):
        return facetValue.split("/")[-1]
    
    def getLimitQueryWith(self, fq):
        limits = ArrayList(self.__fqParts)
        limits.add("category/" + fq)
        return "/".join(limits)
    
    def getFacetIndent(self, facetValue):
        return len(facetValue.split("/"))
    
    def getLimitQueryWithout(self, fq):
        limits = ArrayList(self.__fqParts)
        limits.remove("category/" + fq)
        if limits.isEmpty():
            return ""
        return "/".join(limits)
    
    def __parseUri(self, uri):
        page = 1
        fq = []
        fqParts = []
        if uri != "":
            parts = uri.split("/")
            partType = None
            facetKey = None
            facetValues = None
            for part in parts:
                if partType == "page":
                    facetKey = None
                    page = int(part)
                elif partType == "category":
                    partType = "category-value"
                    facetValues = None
                    facetKey = part
                elif partType == "category-value":
                    if facetValues is None:
                        facetValues = []
                    if part in ["page", "category"]:
                        partType = part
                        facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues))
                        fq.append(facetQuery)
                        fqParts.append("category/%s/%s" % (facetKey, "/".join(facetValues)))
                        facetKey = None
                        facetValues = None
                    else:
                        facetValues.append(URLDecoder.decode(part))
                else:
                    partType = part
            if partType == "category-value":
                facetQuery = '%s:"%s"' % (facetKey, "/".join(facetValues))
                fq.append(facetQuery)
                fqParts.append("category/%s/%s" % (facetKey, "/".join(facetValues)))
        return page, fq, fqParts
コード例 #16
0
    def __reportSearch(self):
        self.reportId = self.request.getParameter("id")
        self.format = self.request.getParameter("format")
        self.report = self.reportManager.getReports().get(self.reportId)
        self.reportQuery = self.report.getQueryAsString()
        self.log.debug("Report query: " + self.reportQuery)
        
        #Get a total number of records
        try:
            out = ByteArrayOutputStream() 
            recnumreq = SearchRequest(self.reportQuery)
            recnumreq.setParam("rows", "0")
            self.indexer.search(recnumreq, out)
            recnumres = SolrResult(ByteArrayInputStream(out.toByteArray()))
            self.__rowsFoundSolr = "%s" % recnumres.getNumFound()
        except:
            self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
            self.log.error("Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
            return
        
        #Setup the main query
        req = SearchRequest(self.reportQuery)
        req.setParam("fq", 'item_type:"object"')
        req.setParam("fq", 'workflow_id:"dataset"')
        req.setParam("rows", self.__rowsFoundSolr)
        try:                
            #Now do the master search
            out = ByteArrayOutputStream()
            self.indexer.search(req, out)
            self.__reportResult = SolrResult(ByteArrayInputStream(out.toByteArray()))
            self.__checkResults()
        except:
            self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
            self.log.error("Reporting threw an exception (report was %s): %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
            return
        
        #At this point the display template has enough to go with.
        #We just need to handle the CSV now
        if (self.format == "csv"):
            #Setup the main query - we need to requery to make sure we return 
            #only the required fields. We'll use the specific IDs that met the
            #__checkResults check
            req = SearchRequest(self.reportQuery)
            req.setParam("fq", 'item_type:"object"')
            req.setParam("fq", 'workflow_id:"dataset"')
            req.setParam("rows", self.__rowsFoundSolr)
            req.setParam("csv.mv.separator",";")
            
            #we need to get a list of the matching IDs from Solr
            #this doesn't work for long queries so it's abandoned
            #but left here commented to make sure we don't try it again
            #idQry = ""
            #for item in self.getProcessedResultsList():
            #    idQry += item.get("id") + " OR "
            #req.setParam("fq", 'id:(%s)' % idQry[:len(idQry)-4])
            
            #Create a list of IDs for reference when preparing the CSV
            idQryList = []
            for item in self.getProcessedResultsList():
                idQryList.append(item.get("id"))
            
            #Setup SOLR query with the required fields
            self.fields = self.systemConfig.getArray("redbox-reports","csv-output-fields")
            #We must have an ID field and it must be the first field
            fieldString = "id,"
            if self.fields is not None:                
                for field in self.fields:
                    fieldString = fieldString+ field.get("field-name")+","
                fieldString = fieldString[:-1]
                
            req.setParam("fl",fieldString)
            
            out = ByteArrayOutputStream()
            try:
                self.indexer.search(req, out, self.format)
            except:
                #We can't get the result back from SOLR so fail back to the template display
                self.errorMsg = "Query failure. Failed to load the data - this issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
                self.log.error("Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
                return
            try:
                csvResponseString = String(out.toByteArray(),"utf-8")
                csvResponseLines = csvResponseString.split("\n")
            except:
                #We can't get the result back from SOLR so fail back to the template display
                self.errorMsg = "Query failure. Failed to prepare the CSV - this issue has been logged (%s - %s)." % (sys.exc_info()[0], sys.exc_info()[1])
                self.log.error("Reporting threw an exception (report was %s); Error: %s - %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
                return
            
            fileName = self.urlEncode(self.report.getLabel())
            self.log.debug("Generating CSV report with file name: " + fileName)
            self.response.setHeader("Content-Disposition", "attachment; filename=%s.csv" % fileName)
            
            sw = StringWriter()
            parser = CSVParser()
            writer = CSVWriter(sw)
            count = 0
            
            prevLine = ""
            badRowFlag = False
            
            for line in csvResponseLines:
                if badRowFlag:
                    #In this section of code we'll handle errors by either trying to fix the problem
                    #or by adding an error line in the CSV. We'll then move to the next row and keep going
                    try:
                        self.log.debug("Reporting - trying to append the previous line with the previous faulty one. Line appears as: %s" % prevLine + line)
                        csvLine = parser.parseLine(prevLine + line)
                        badRowFlag = False
                        prevLine = ""
                        self.log.debug("Reporting - remedy appears to have worked. Line appears as: %s" % prevLine + line)
                    except:
                        #We tried to rescue the file but failed on the second run so give up
                        writer.writeNext(["Failed to transfer record to CSV - check logs"])
                        self.log.error("Reporting threw an exception (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], prevLine + line))
                else:
                    try: 
                        csvLine = parser.parseLine(line)
                        badRowFlag = False
                        prevLine = ""
                    except: 
                        #This can happen if there's a newline in the index data 
                        #so we raise the badRowFlag and see if we can join this
                        #row to the next one to fix it
                        self.log.debug("Reporting threw an exception but I'll see if it's just a formatting issue (report was %s); Error: %s - %s; Result line: %s" % (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1], line))
                        badRowFlag = True
                        prevLine = line
                        continue
                
                if count == 0 :
                    #Header row
                    count += 1
                    for idx, csvValue in enumerate(csvLine):
                        csvLine[idx] = self.findDisplayLabel(csvValue)  
                elif csvLine[0] not in idQryList:
                    #ignore
                    continue
                
                writer.writeNext(csvLine)

            
            #Now send off the CSV
            self.out = self.response.getOutputStream("text/csv")
            self.out.print(sw.toString())
            self.out.close()
コード例 #17
0
    def __activate__(self, context):
        self.log = context["log"]

        response = context["response"]
        writer = response.getPrintWriter("text/plain; charset=UTF-8")
        auth = context["page"].authentication
        sessionState = context["sessionState"]
        result = JsonObject()
        result.put("status", "error")
        result.put("message", "An unknown error has occurred")
        if auth.is_admin():
            services = context["Services"]
            formData = context["formData"]
            func = formData.get("func")
            oid = formData.get("oid")
            portalId = formData.get("portalId")
            portalManager = services.portalManager
            if func == "reharvest":
                if oid:
                    self.log.debug("Reharvesting object '{}'", oid)
                    self.sendMessage(oid)
                    result.put("status", "ok")
                    result.put("message", "Object '%s' queued for reharvest")
                elif portalId:
                    self.log.debug("Reharvesting view '{}'", portalId)
                    sessionState.set("reharvest/running/" + portalId, "true")
                    # TODO security filter - not necessary because this requires admin anyway?
                    portal = portalManager.get(portalId)
                    query = "*:*"
                    if portal.query != "":
                        query = portal.query
                    if portal.searchQuery != "":
                        if query == "*:*":
                            query = portal.searchQuery
                        else:
                            query = query + " AND " + portal.searchQuery
                    # query solr to get the objects to reharvest
                    rows = 25
                    req = SearchRequest(query)
                    req.setParam("fq", 'item_type:"object"')
                    req.setParam("rows", str(rows))
                    req.setParam("fl", "id")
                    done = False
                    count = 0
                    while not done:
                        req.setParam("start", str(count))
                        out = ByteArrayOutputStream()
                        services.indexer.search(req, out)
                        json = SolrResult(
                            ByteArrayInputStream(out.toByteArray()))
                        objectIds = HashSet(json.getFieldList("id"))
                        if not objectIds.isEmpty():
                            for oid in objectIds:
                                self.sendMessage(oid)
                        count = count + rows
                        total = json.getNumFound()
                        self.log.debug("Queued {} of {}...", min(count, total),
                                       total)
                        done = (count >= total)
                    sessionState.remove("reharvest/running/" + portalId)
                    result.put("status", "ok")
                    result.put(
                        "message",
                        "Objects in '%s' queued for reharvest" % portalId)
                else:
                    response.setStatus(500)
                    result.put("message",
                               "No object or view specified for reharvest")
            elif func == "reindex":
                if oid:
                    self.log.debug("Reindexing object '{}'", oid)
                    self.sendMessage(oid)
                    result.put("status", "ok")
                    result.put("message",
                               "Object '%s' queued for reindex" % oid)
                else:
                    response.setStatus(500)
                    result.put("message", "No object specified to reindex")
            else:
                response.setStatus(500)
                result.put("message", "Unknown action '%s'" % func)
        else:
            response.setStatus(500)
            result.put("message",
                       "Only administrative users can access this API")
        writer.println(result.toString())
        writer.close()
コード例 #18
0
ファイル: reportResult.py プロジェクト: nishen/redbox
    def __reportSearch(self):
        self.reportId = self.request.getParameter("id")
        self.format = self.request.getParameter("format")
        self.report = self.reportManager.getReports().get(self.reportId)
        self.reportQuery = self.report.getQueryAsString()
        self.log.debug("Report query: " + self.reportQuery)

        #Get a total number of records
        try:
            out = ByteArrayOutputStream()
            recnumreq = SearchRequest(self.reportQuery)
            recnumreq.setParam("rows", "0")
            self.indexer.search(recnumreq, out)
            recnumres = SolrResult(ByteArrayInputStream(out.toByteArray()))
            self.__rowsFoundSolr = "%s" % recnumres.getNumFound()
        except:
            self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (
                sys.exc_info()[0], sys.exc_info()[1])
            self.log.error(
                "Reporting threw an exception (report was %s): %s - %s" %
                (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
            return

        #Setup the main query
        req = SearchRequest(self.reportQuery)
        req.setParam("fq", 'item_type:"object"')
        req.setParam("fq", 'workflow_id:"dataset"')
        req.setParam("rows", self.__rowsFoundSolr)
        try:
            #Now do the master search
            out = ByteArrayOutputStream()
            self.indexer.search(req, out)
            self.__reportResult = SolrResult(
                ByteArrayInputStream(out.toByteArray()))
            self.__checkResults()
        except:
            self.errorMsg = "Query failure. The issue has been logged (%s - %s)." % (
                sys.exc_info()[0], sys.exc_info()[1])
            self.log.error(
                "Reporting threw an exception (report was %s): %s - %s" %
                (self.report.getLabel(), sys.exc_info()[0], sys.exc_info()[1]))
            return

        #At this point the display template has enough to go with.
        #We just need to handle the CSV now
        if (self.format == "csv"):
            #Setup the main query - we need to requery to make sure we return
            #only the required fields. We'll use the specific IDs that met the
            #__checkResults check
            req = SearchRequest(self.reportQuery)
            req.setParam("fq", 'item_type:"object"')
            req.setParam("fq", 'workflow_id:"dataset"')
            req.setParam("rows", self.__rowsFoundSolr)
            req.setParam("csv.mv.separator", ";")

            #we need to get a list of the matching IDs from Solr
            #this doesn't work for long queries so it's abandoned
            #but left here commented to make sure we don't try it again
            #idQry = ""
            #for item in self.getProcessedResultsList():
            #    idQry += item.get("id") + " OR "
            #req.setParam("fq", 'id:(%s)' % idQry[:len(idQry)-4])

            #Create a list of IDs for reference when preparing the CSV
            idQryList = []
            for item in self.getProcessedResultsList():
                idQryList.append(item.get("id"))

            #Setup SOLR query with the required fields
            self.fields = self.systemConfig.getArray("redbox-reports",
                                                     "csv-output-fields")
            #We must have an ID field and it must be the first field
            fieldString = "id,"
            if self.fields is not None:
                for field in self.fields:
                    fieldString = fieldString + field.get("field-name") + ","
                fieldString = fieldString[:-1]

            req.setParam("fl", fieldString)

            out = ByteArrayOutputStream()
            try:
                self.indexer.search(req, out, self.format)
            except:
                #We can't get the result back from SOLR so fail back to the template display
                self.errorMsg = "Query failure. Failed to load the data - this issue has been logged (%s - %s)." % (
                    sys.exc_info()[0], sys.exc_info()[1])
                self.log.error(
                    "Reporting threw an exception (report was %s); Error: %s - %s"
                    % (self.report.getLabel(), sys.exc_info()[0],
                       sys.exc_info()[1]))
                return
            try:
                csvResponseString = String(out.toByteArray(), "utf-8")
                csvResponseLines = csvResponseString.split("\n")
            except:
                #We can't get the result back from SOLR so fail back to the template display
                self.errorMsg = "Query failure. Failed to prepare the CSV - this issue has been logged (%s - %s)." % (
                    sys.exc_info()[0], sys.exc_info()[1])
                self.log.error(
                    "Reporting threw an exception (report was %s); Error: %s - %s"
                    % (self.report.getLabel(), sys.exc_info()[0],
                       sys.exc_info()[1]))
                return

            fileName = self.urlEncode(self.report.getLabel())
            self.log.debug("Generating CSV report with file name: " + fileName)
            self.response.setHeader("Content-Disposition",
                                    "attachment; filename=%s.csv" % fileName)

            sw = StringWriter()
            parser = CSVParser()
            writer = CSVWriter(sw)
            count = 0

            prevLine = ""
            badRowFlag = False

            for line in csvResponseLines:
                if badRowFlag:
                    #In this section of code we'll handle errors by either trying to fix the problem
                    #or by adding an error line in the CSV. We'll then move to the next row and keep going
                    try:
                        self.log.debug(
                            "Reporting - trying to append the previous line with the previous faulty one. Line appears as: %s"
                            % prevLine + line)
                        csvLine = parser.parseLine(prevLine + line)
                        badRowFlag = False
                        prevLine = ""
                        self.log.debug(
                            "Reporting - remedy appears to have worked. Line appears as: %s"
                            % prevLine + line)
                    except:
                        #We tried to rescue the file but failed on the second run so give up
                        writer.writeNext(
                            ["Failed to transfer record to CSV - check logs"])
                        self.log.error(
                            "Reporting threw an exception (report was %s); Error: %s - %s; Result line: %s"
                            % (self.report.getLabel(), sys.exc_info()[0],
                               sys.exc_info()[1], prevLine + line))
                else:
                    try:
                        csvLine = parser.parseLine(line)
                        badRowFlag = False
                        prevLine = ""
                    except:
                        #This can happen if there's a newline in the index data
                        #so we raise the badRowFlag and see if we can join this
                        #row to the next one to fix it
                        self.log.debug(
                            "Reporting threw an exception but I'll see if it's just a formatting issue (report was %s); Error: %s - %s; Result line: %s"
                            % (self.report.getLabel(), sys.exc_info()[0],
                               sys.exc_info()[1], line))
                        badRowFlag = True
                        prevLine = line
                        continue

                if count == 0:
                    #Header row
                    count += 1
                    for idx, csvValue in enumerate(csvLine):
                        csvLine[idx] = self.findDisplayLabel(csvValue)
                elif csvLine[0] not in idQryList:
                    #ignore
                    continue

                writer.writeNext(csvLine)

            #Now send off the CSV
            self.out = self.response.getOutputStream("text/csv")
            self.out.print(sw.toString())
            self.out.close()
コード例 #19
0
class OaiData:
    def __init__(self):
        self.tokensDB = None

    def __activate__(self, context):
        if self.tokensDB is None:
            self.tokensDB = TokensDatabase(context)

        # Set up configuration
        self.systemConfig = JsonSimpleConfig()
        self.oaiConfig = None
        self.getMetadataFormats()

        self.velocityContext = context
        self.services = context["Services"]
        self.log = context["log"]
        self.sessionState = context["sessionState"]
        self.portalDir = context["portalDir"]

        self.__result = None
        self.lastPage = False

        # Check if the OAI request has an overriding portal ('set') to the URL
        paramSet = self.vc("formData").get("set")
        self.__portalName = context["page"].getPortal().getName()
        illegalSet = False
        if paramSet is not None:
            portals = self.vc("page").getPortals().keySet()
            if portals.contains(paramSet):
                self.__portalName = paramSet
            else:
                illegalSet = True

        self.__metadataPrefix = ""
        self.__sessionExpiry = self.systemConfig.getInteger(
            None, ["portal", "oai-pmh", "sessionExpiry"])

        # Check if there's a resumption token in the formData
        self.__currentToken = None
        resumptionToken = self.vc("formData").get("resumptionToken")

        if resumptionToken is not None:
            token = self.tokensDB.getToken(resumptionToken)
            self.__currentToken = token

        # Process/parse the request we've received for validity
        self.vc("request").setAttribute("Content-Type", "text/xml")
        self.__request = OaiPmhVerb(context, self.tokensDB,
                                    self.__currentToken)
        if self.getError() is None and illegalSet:
            self.__request.setError("badArgument",
                                    "Set '%s' is not valid!" % paramSet)

        # If there are no errors... and the request requires some additional
        #  data (like a search result) do so now. Everything else can be
        #  handled in the templates.
        if self.getError() is None and \
                self.getVerb() in ["GetRecord", "ListIdentifiers", "ListRecords"]:

            # Find the metadata prefix requested
            self.__metadataPrefix = self.vc("formData").get("metadataPrefix")
            if self.__metadataPrefix is None:
                self.__metadataPrefix = self.__currentToken.getMetadataPrefix()

            if resumptionToken is None:
                self.__buildResumptionTokenSets()
            else:
                self.__result = SolrResult(self.__currentToken.getResultJson())

            # Only list records if the metadata format is enabled in this view
            if self.isInView(self.__metadataPrefix) == False:
                self.__result = None

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.log.error("ERROR: Requested context entry '" + index +
                           "' doesn't exist")
            return None

    def isInView(self, format, view=None):
        # Sanity check
        if format is None or format == "":
            return False
        # Default to current poral
        if view is None:
            view = self.__portalName

        # Make sure there is some config for this format
        formatConfig = self.getMetadataFormats().get(format)
        if formatConfig is None:
            return False
        # Is it visible everywhere?
        allViews = formatConfig.getBoolean(False, ["enabledInAllViews"])
        if allViews:
            return True
        # Check if it is visible in this view
        else:
            allowedViews = formatConfig.getStringList(["enabledViews"])
            if view in allowedViews:
                return True
        # Rejection
        return False

    def getID(self, item):
        identifier = item.getFirst("oai_identifier")
        # Fallback to the default
        if identifier is None or identifier == "":
            return "oai:fascinator.usq.edu.au:" + item.getFirst("id")
        # Use the indexed value
        return identifier

    def isDeleted(self, item):
        return bool(item.getFirst("oai_deleted"))

    def getSet(self, item):
        set = item.getFirst("oai_set")
        # Fallback to the portal name
        if set is None or set == "":
            return self.__portalName
        # Use the required set
        return set

    def getVerb(self):
        return self.getRequest().getVerb()

    def getError(self):
        return self.getRequest().getError()

    def getResponseDate(self):
        return time.strftime("%Y-%m-%dT%H:%M:%SZ")

    def getRequest(self):
        return self.__request

    def getResult(self):
        return self.__result

    def getElement(self, elementName, values):
        elementStr = ""
        if values:
            for value in values:
                elementStr += "<%s>%s</%s>" % (elementName, value, elementName)
        return elementStr

    def __buildResumptionTokenSets(self):
        self.__result = SolrResult(None)

        portal = self.services.getPortalManager().get(self.__portalName)
        recordsPerPage = portal.recordsPerPage
        # Resolve our identifier
        id = self.vc("formData").get("identifier")
        query = "*:*"
        if id is not None and id != "":
            # A default TF2 OID
            if id.startswith("oai:fascinator.usq.edu.au:"):
                idString = id.replace("oai:fascinator.usq.edu.au:", "")
                idString = self.__escapeQuery(idString)
                query = "id:" + idString
            # Or a custom OAI ID
            else:
                idString = self.__escapeQuery(id)
                query = "oai_identifier:" + idString

        req = SearchRequest(query)
        req.setParam("facet", "true")
        req.setParam("rows", str(recordsPerPage))
        req.setParam("facet.field", portal.facetFieldList)
        req.setParam("facet.limit", str(portal.facetCount))
        req.setParam("sort", "f_dc_title asc")

        portalQuery = portal.query
        if portalQuery:
            req.addParam("fq", portalQuery)
        req.addParam("fq", "item_type:object")

        # Date data... is supplied
        fromDate = self.__request.getFromDate()
        untilDate = self.__request.getUntilDate()
        if fromDate is not None:
            fromStr = fromDate.isoformat() + "Z"
            self.log.debug("From Date: '{}'", fromStr)
            if untilDate is not None:
                untilStr = untilDate.isoformat() + "Z"
                self.log.debug("Until Date: '{}'", untilStr)
                queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr)
            else:
                queryStr = "last_modified:[%s TO *]" % (fromStr)
            self.log.debug("Date query: '{}'", queryStr)
            req.addParam("fq", queryStr)
        else:
            if untilDate is not None:
                untilStr = untilDate.isoformat() + "Z"
                self.log.debug("Until Date: '{}'", untilDate.isoformat())
                queryStr = "last_modified:[* TO %s]" % (untilStr)
                self.log.debug("Date query: '{}'", queryStr)
                req.addParam("fq", queryStr)

        # Check if there's resumption token exist in the formData
        start = 0

        req.setParam("start", str(start))

        out = ByteArrayOutputStream()
        self.services.indexer.search(req, out)
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))

        totalFound = self.__result.getNumFound()
        self.log.debug("Total found:" + str(totalFound))
        if totalFound > recordsPerPage:

            startRow = 0
            random.seed()
            resumptionToken = "%016x" % random.getrandbits(128)

            nextResumptionToken = resumptionToken
            firstLoop = True
            while True:
                self.log.debug("Current Resumption Token: " + resumptionToken)
                req.setParam("start", str(startRow))
                out = ByteArrayOutputStream()
                self.services.indexer.search(req, out)
                result = SolrResult(ByteArrayInputStream(out.toByteArray()))

                tokenObject = ResumptionToken(resumptionToken,
                                              self.__metadataPrefix,
                                              nextResumptionToken,
                                              result.toString())

                if firstLoop:
                    self.__currentToken = ResumptionToken(
                        None, self.__metadataPrefix, resumptionToken, None)
                    tokenObject = None
                    firstLoop = False

                startRow = startRow + recordsPerPage

                if startRow > totalFound:
                    tokenObject = ResumptionToken(resumptionToken,
                                                  self.__metadataPrefix, "",
                                                  result.toString())
                    self.tokensDB.storeToken(tokenObject)
                    break
                if tokenObject is not None:
                    self.tokensDB.storeToken(tokenObject)

                resumptionToken = nextResumptionToken
                nextResumptionToken = "%016x" % random.getrandbits(128)
                self.log.debug("Resumption Token: " + resumptionToken +
                               " next resumption token:" + nextResumptionToken)

    def getToken(self):
        if self.isInView(self.__metadataPrefix) and not self.lastPage:
            return self.__currentToken
        return None

    def getMetadataFormats(self):
        if self.oaiConfig is None:
            self.oaiConfig = self.systemConfig.getJsonSimpleMap(
                ["portal", "oai-pmh", "metadataFormats"])
        return self.oaiConfig

    def encodeXml(self, string):
        return StringEscapeUtils.escapeXml(string)

    def getPayload(self, oid, metadataFileName):
        # First get the Object from storage
        object = None
        try:
            object = self.services.getStorage().getObject(oid)
        except StorageException, e:
            return None

        # Check whether the payload exists
        try:
            return object.getPayload(metadataFileName)
        except StorageException, e:
            return None
コード例 #20
0
ファイル: reharvest.py プロジェクト: Deakin/the-fascinator
    def __activate__(self, context):
        response = context["response"]
        log = context["log"]
        writer = response.getPrintWriter("text/plain; charset=UTF-8")
        auth = context["page"].authentication
        sessionState = context["sessionState"]

        result = JsonObject()
        result.put("status", "error")
        result.put("message", "An unknown error has occurred")

        if auth.is_admin():
            services = context["Services"]
            formData = context["formData"]
            func = formData.get("func")
            oid = formData.get("oid")
            portalId = formData.get("portalId")
            portalManager = services.portalManager

            if func == "reharvest":
                # One object
                if oid:
                    log.info(" * Reharvesting object '{}'", oid)
                    portalManager.reharvest(oid)
                    result.put("status", "ok")
                    result.put("message", "Object '%s' queued for reharvest")

                # The whole portal
                elif portalId:
                    log.info(" * Reharvesting view '{}'", portalId)
                    sessionState.set("reharvest/running/" + portalId, "true")
                    # TODO security filter - not necessary because this requires admin anyway?
                    portal = portalManager.get(portalId)
                    query = "*:*"
                    if portal.query != "":
                        query = portal.query
                    if portal.searchQuery != "":
                        if query == "*:*":
                            query = portal.searchQuery
                        else:
                            query = query + " AND " + portal.searchQuery
                    # query solr to get the objects to reharvest
                    rows = 25
                    req = SearchRequest(query)
                    req.setParam("fq", 'item_type:"object"')
                    req.setParam("rows", str(rows))
                    req.setParam("fl", "id")
                    done = False
                    count = 0
                    while not done:
                        req.setParam("start", str(count))
                        out = ByteArrayOutputStream()
                        services.indexer.search(req, out)
                        json = SolrResult(ByteArrayInputStream(out.toByteArray()))
                        objectIds = HashSet(json.getFieldList("id"))
                        if not objectIds.isEmpty():
                            portalManager.reharvest(objectIds)
                        count = count + rows
                        total = json.getNumFound()
                        log.info(" * Queued {} of {}...", (min(count, total), total))
                        done = (count >= total)
                    sessionState.remove("reharvest/running/" + portalId)
                    result.put("status", "ok")
                    result.put("message", "Objects in '%s' queued for reharvest" % portalId)
                else:
                    response.setStatus(500)
                    result.put("message", "No object or view specified for reharvest")

            elif func == "reindex":
                if oid:
                    log.info(" * Reindexing object '{}'", oid)
                    services.indexer.index(oid)
                    services.indexer.commit()
                    result.put("status", "ok")
                    result.put("message", "Object '%s' queued for reindex" % portalId)
                else:
                    response.setStatus(500)
                    result.put("message", "No object specified to reindex")
            else:
                response.setStatus(500)
                result.put("message", "Unknown action '%s'" % func)
        else:
            response.setStatus(500)
            result.put("message", "Only administrative users can access this API")
        writer.println(result.toString())
        writer.close()
コード例 #21
0
class HomeData:
    def __init__(self):
        pass

    def __activate__(self, context):
        self.velocityContext = context
        self.vc("sessionState").remove("fq")
        self.services = self.vc("Services")
        self.__latest = None
        self.__mine = None
        self.__workflows = None
        self.__result = None
        self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            print "ERROR: Requested context entry '" + index + "' doesn't exist"
            return None

    def __search(self):
        indexer = self.services.getIndexer()
        portalQuery = self.services.getPortalManager().get(self.vc("portalId")).getQuery()
        portalSearchQuery = self.services.getPortalManager().get(self.vc("portalId")).getSearchQuery()
        
        # Security prep work
        current_user = self.vc("page").authentication.get_username()
        security_roles = self.vc("page").authentication.get_roles_list()
        security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")'
        security_exceptions = 'security_exception:"' + current_user + '"'
        owner_query = 'owner:"' + current_user + '"'
        security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")"
        isAdmin = self.vc("page").authentication.is_admin()

        req = SearchRequest("last_modified:[NOW-1MONTH TO *]")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest(owner_query)
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__mine = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req = SearchRequest('workflow_security:"' + current_user + '"')
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.setParam("rows", "10")
        req.setParam("sort", "last_modified desc, f_dc_title asc");
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        self.__workflows = SolrResult(ByteArrayInputStream(out.toByteArray()))

        req = SearchRequest("*:*")
        req.setParam("fq", 'item_type:"object"')
        if portalQuery:
            req.addParam("fq", portalQuery)
        if portalSearchQuery:
            req.addParam("fq", portalSearchQuery)
        req.addParam("fq", "")
        req.setParam("rows", "0")
        if not isAdmin:
            req.addParam("fq", security_query)
        out = ByteArrayOutputStream()
        indexer.search(req, out)
        
        self.vc("sessionState").set("fq", 'item_type:"object"')
        #sessionState.set("query", portalQuery.replace("\"", "'"))
        
        self.__result = SolrResult(ByteArrayInputStream(out.toByteArray()))
    
    def getLatest(self):
        return self.__latest.getResults()
    
    def getMine(self):
        return self.__mine.getResults()

    def getWorkflows(self):
        return self.__workflows.getResults()

    def getItemCount(self):
        return self.__result.getNumFound()
コード例 #22
0
class HarvestreportData:
    def __init__(self):
        pass

    def __activate__(self, context):
        self.velocityContext = context
        self.vc("sessionState").remove("fq")
        self.services = self.vc("Services")
        self.request = self.vc("request")
        self.log = context["log"]
        self.__harvestedRecords = None
        
        uri = URLDecoder.decode(self.request.getAttribute("RequestURI"))
        self.__harvestId = os.path.basename(uri)
        self.__search()

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            print "ERROR: Requested context entry '" + index + "' doesn't exist"
            return None

    def __search(self):
        indexer = self.services.getIndexer()
        
        # Security prep work
        isAdmin = self.vc("page").authentication.is_admin()
        if not isAdmin:
            print "ERROR: User is not an admin '"
            return None

        req = SearchRequest('harvestId:"' + self.__harvestId + '"')
        req.setParam("fq", 'eventType:modify')
        out = ByteArrayOutputStream()
        try:
            indexer.searchByIndex(req, out, "eventLog")
        except:
            print traceback.format_exc();
            print repr(traceback.print_exc())
            traceback.print_stack(file=sys.stdout)
        self.__harvestedRecords = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest('harvestId:"' + self.__harvestId + '"')
        req.setParam("fq", 'eventType:modify')
        req.setParam("fq", 'isNew:true')
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest('harvestId:"' + self.__harvestId + '"')
        req.setParam("fq", 'eventType:modify')
        req.setParam("fq", 'isModified:true')
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__modified = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest('harvestId:"' + self.__harvestId + '" AND eventType:"modify" AND isModified:false')
        req.setParam("fq", 'isNew:false')
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__unmodified = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest('harvestId:"' + self.__harvestId + '"')
        req.setParam("fq", 'eventType:harvestEnd')
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        endItem = SolrResult(ByteArrayInputStream(out.toByteArray()))
        endTimeList = endItem.getFieldList('eventTime')
        
        date = None;
        repoType = None;
        repoName = None;
        
        if(endTimeList.size() > 0):
            date = endTimeList.get(0)
        
        repoTypeList = endItem.getFieldList('repository_type')
        if(repoTypeList.size() > 0):
            repoType = repoTypeList.get(0)
            
        repoNameList = endItem.getFieldList('repository_name')
        if(repoTypeList.size() > 0):
            repoName = repoNameList.get(0)
        
        req = SearchRequest('repository_type:"' + repoType + '" AND repository_name:"' + repoName + '" AND eventType:"modify" AND isNew:true')  
        req.setParam("fq", "eventTime:[* TO " + date + "]")
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__allRecords = SolrResult(ByteArrayInputStream(out.toByteArray()))
        

    def getItemcount(self):
        return self.__harvestedRecords.getNumFound()
    
    def getNewcount(self):
        return self.__latest.getNumFound()
    
    def getModifiedcount(self):
        return self.__modified.getNumFound()
    
    def getUnmodifiedcount(self):
        return self.__unmodified.getNumFound()
    
    def getTotalcount(self):
        return self.__allRecords.getNumFound()