コード例 #1
0
    def __search(self):
        indexer = self.services.getIndexer()
        
        # Security prep work
        isAdmin = self.vc("page").authentication.is_admin()
        if not isAdmin:
            print "ERROR: User is not an admin '"
            return None

        req = SearchRequest('harvestId:"' + self.__harvestId + '"')
        req.setParam("fq", 'eventType:modify')
        out = ByteArrayOutputStream()
        try:
            indexer.searchByIndex(req, out, "eventLog")
        except:
            print traceback.format_exc();
            print repr(traceback.print_exc())
            traceback.print_stack(file=sys.stdout)
        self.__harvestedRecords = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest('harvestId:"' + self.__harvestId + '"')
        req.setParam("fq", 'eventType:modify')
        req.setParam("fq", 'isNew:true')
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest('harvestId:"' + self.__harvestId + '"')
        req.setParam("fq", 'eventType:modify')
        req.setParam("fq", 'isModified:true')
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__modified = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest('harvestId:"' + self.__harvestId + '" AND eventType:"modify" AND isModified:false')
        req.setParam("fq", 'isNew:false')
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__unmodified = SolrResult(ByteArrayInputStream(out.toByteArray()))
        
        req = SearchRequest('harvestId:"' + self.__harvestId + '"')
        req.setParam("fq", 'eventType:harvestEnd')
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        endItem = SolrResult(ByteArrayInputStream(out.toByteArray()))
        endTimeList = endItem.getFieldList('eventTime')
        
        date = None;
        repoType = None;
        repoName = None;
        
        if(endTimeList.size() > 0):
            date = endTimeList.get(0)
        
        repoTypeList = endItem.getFieldList('repository_type')
        if(repoTypeList.size() > 0):
            repoType = repoTypeList.get(0)
            
        repoNameList = endItem.getFieldList('repository_name')
        if(repoTypeList.size() > 0):
            repoName = repoNameList.get(0)
        
        req = SearchRequest('repository_type:"' + repoType + '" AND repository_name:"' + repoName + '" AND eventType:"modify" AND isNew:true')  
        req.setParam("fq", "eventTime:[* TO " + date + "]")
        out = ByteArrayOutputStream()
        indexer.searchByIndex(req, out, "eventLog")
        self.__allRecords = SolrResult(ByteArrayInputStream(out.toByteArray()))
コード例 #2
0
ファイル: reharvest.py プロジェクト: Deakin/the-fascinator
    def __activate__(self, context):
        response = context["response"]
        log = context["log"]
        writer = response.getPrintWriter("text/plain; charset=UTF-8")
        auth = context["page"].authentication
        sessionState = context["sessionState"]

        result = JsonObject()
        result.put("status", "error")
        result.put("message", "An unknown error has occurred")

        if auth.is_admin():
            services = context["Services"]
            formData = context["formData"]
            func = formData.get("func")
            oid = formData.get("oid")
            portalId = formData.get("portalId")
            portalManager = services.portalManager

            if func == "reharvest":
                # One object
                if oid:
                    log.info(" * Reharvesting object '{}'", oid)
                    portalManager.reharvest(oid)
                    result.put("status", "ok")
                    result.put("message", "Object '%s' queued for reharvest")

                # The whole portal
                elif portalId:
                    log.info(" * Reharvesting view '{}'", portalId)
                    sessionState.set("reharvest/running/" + portalId, "true")
                    # TODO security filter - not necessary because this requires admin anyway?
                    portal = portalManager.get(portalId)
                    query = "*:*"
                    if portal.query != "":
                        query = portal.query
                    if portal.searchQuery != "":
                        if query == "*:*":
                            query = portal.searchQuery
                        else:
                            query = query + " AND " + portal.searchQuery
                    # query solr to get the objects to reharvest
                    rows = 25
                    req = SearchRequest(query)
                    req.setParam("fq", 'item_type:"object"')
                    req.setParam("rows", str(rows))
                    req.setParam("fl", "id")
                    done = False
                    count = 0
                    while not done:
                        req.setParam("start", str(count))
                        out = ByteArrayOutputStream()
                        services.indexer.search(req, out)
                        json = SolrResult(ByteArrayInputStream(out.toByteArray()))
                        objectIds = HashSet(json.getFieldList("id"))
                        if not objectIds.isEmpty():
                            portalManager.reharvest(objectIds)
                        count = count + rows
                        total = json.getNumFound()
                        log.info(" * Queued {} of {}...", (min(count, total), total))
                        done = (count >= total)
                    sessionState.remove("reharvest/running/" + portalId)
                    result.put("status", "ok")
                    result.put("message", "Objects in '%s' queued for reharvest" % portalId)
                else:
                    response.setStatus(500)
                    result.put("message", "No object or view specified for reharvest")

            elif func == "reindex":
                if oid:
                    log.info(" * Reindexing object '{}'", oid)
                    services.indexer.index(oid)
                    services.indexer.commit()
                    result.put("status", "ok")
                    result.put("message", "Object '%s' queued for reindex" % portalId)
                else:
                    response.setStatus(500)
                    result.put("message", "No object specified to reindex")
            else:
                response.setStatus(500)
                result.put("message", "Unknown action '%s'" % func)
        else:
            response.setStatus(500)
            result.put("message", "Only administrative users can access this API")
        writer.println(result.toString())
        writer.close()
コード例 #3
0
    def __activate__(self, context):
        self.log = context["log"]

        response = context["response"]
        writer = response.getPrintWriter("text/plain; charset=UTF-8")
        auth = context["page"].authentication
        sessionState = context["sessionState"]
        result = JsonObject()
        result.put("status", "error")
        result.put("message", "An unknown error has occurred")
        if auth.is_admin():
            services = context["Services"]
            formData = context["formData"]
            func = formData.get("func")
            oid = formData.get("oid")
            portalId = formData.get("portalId")
            portalManager = services.portalManager
            if func == "reharvest":
                if oid:
                    self.log.debug("Reharvesting object '{}'", oid)
                    self.sendMessage(oid)
                    result.put("status", "ok")
                    result.put("message", "Object '%s' queued for reharvest")
                elif portalId:
                    self.log.debug("Reharvesting view '{}'", portalId)
                    sessionState.set("reharvest/running/" + portalId, "true")
                    # TODO security filter - not necessary because this requires admin anyway?
                    portal = portalManager.get(portalId)
                    query = "*:*"
                    if portal.query != "":
                        query = portal.query
                    if portal.searchQuery != "":
                        if query == "*:*":
                            query = portal.searchQuery
                        else:
                            query = query + " AND " + portal.searchQuery
                    # query solr to get the objects to reharvest
                    rows = 25
                    req = SearchRequest(query)
                    req.setParam("fq", 'item_type:"object"')
                    req.setParam("rows", str(rows))
                    req.setParam("fl", "id")
                    done = False
                    count = 0
                    while not done:
                        req.setParam("start", str(count))
                        out = ByteArrayOutputStream()
                        services.indexer.search(req, out)
                        json = SolrResult(
                            ByteArrayInputStream(out.toByteArray()))
                        objectIds = HashSet(json.getFieldList("id"))
                        if not objectIds.isEmpty():
                            for oid in objectIds:
                                self.sendMessage(oid)
                        count = count + rows
                        total = json.getNumFound()
                        self.log.debug("Queued {} of {}...", min(count, total),
                                       total)
                        done = (count >= total)
                    sessionState.remove("reharvest/running/" + portalId)
                    result.put("status", "ok")
                    result.put(
                        "message",
                        "Objects in '%s' queued for reharvest" % portalId)
                else:
                    response.setStatus(500)
                    result.put("message",
                               "No object or view specified for reharvest")
            elif func == "reindex":
                if oid:
                    self.log.debug("Reindexing object '{}'", oid)
                    self.sendMessage(oid)
                    result.put("status", "ok")
                    result.put("message",
                               "Object '%s' queued for reindex" % oid)
                else:
                    response.setStatus(500)
                    result.put("message", "No object specified to reindex")
            else:
                response.setStatus(500)
                result.put("message", "Unknown action '%s'" % func)
        else:
            response.setStatus(500)
            result.put("message",
                       "Only administrative users can access this API")
        writer.println(result.toString())
        writer.close()