def __search(self): indexer = self.services.getIndexer() # Security prep work isAdmin = self.vc("page").authentication.is_admin() if not isAdmin: print "ERROR: User is not an admin '" return None req = SearchRequest('harvestId:"' + self.__harvestId + '"') req.setParam("fq", 'eventType:modify') out = ByteArrayOutputStream() try: indexer.searchByIndex(req, out, "eventLog") except: print traceback.format_exc(); print repr(traceback.print_exc()) traceback.print_stack(file=sys.stdout) self.__harvestedRecords = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('harvestId:"' + self.__harvestId + '"') req.setParam("fq", 'eventType:modify') req.setParam("fq", 'isNew:true') out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__latest = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('harvestId:"' + self.__harvestId + '"') req.setParam("fq", 'eventType:modify') req.setParam("fq", 'isModified:true') out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__modified = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('harvestId:"' + self.__harvestId + '" AND eventType:"modify" AND isModified:false') req.setParam("fq", 'isNew:false') out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__unmodified = SolrResult(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('harvestId:"' + self.__harvestId + '"') req.setParam("fq", 'eventType:harvestEnd') out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") endItem = SolrResult(ByteArrayInputStream(out.toByteArray())) endTimeList = endItem.getFieldList('eventTime') date = None; repoType = None; repoName = None; if(endTimeList.size() > 0): date = endTimeList.get(0) repoTypeList = endItem.getFieldList('repository_type') if(repoTypeList.size() > 0): repoType = repoTypeList.get(0) repoNameList = endItem.getFieldList('repository_name') if(repoTypeList.size() > 0): repoName = repoNameList.get(0) req = SearchRequest('repository_type:"' + repoType + '" AND repository_name:"' + repoName + '" AND eventType:"modify" AND isNew:true') req.setParam("fq", "eventTime:[* TO " + date + "]") out = ByteArrayOutputStream() indexer.searchByIndex(req, out, "eventLog") self.__allRecords = SolrResult(ByteArrayInputStream(out.toByteArray()))
def __activate__(self, context): response = context["response"] log = context["log"] writer = response.getPrintWriter("text/plain; charset=UTF-8") auth = context["page"].authentication sessionState = context["sessionState"] result = JsonObject() result.put("status", "error") result.put("message", "An unknown error has occurred") if auth.is_admin(): services = context["Services"] formData = context["formData"] func = formData.get("func") oid = formData.get("oid") portalId = formData.get("portalId") portalManager = services.portalManager if func == "reharvest": # One object if oid: log.info(" * Reharvesting object '{}'", oid) portalManager.reharvest(oid) result.put("status", "ok") result.put("message", "Object '%s' queued for reharvest") # The whole portal elif portalId: log.info(" * Reharvesting view '{}'", portalId) sessionState.set("reharvest/running/" + portalId, "true") # TODO security filter - not necessary because this requires admin anyway? portal = portalManager.get(portalId) query = "*:*" if portal.query != "": query = portal.query if portal.searchQuery != "": if query == "*:*": query = portal.searchQuery else: query = query + " AND " + portal.searchQuery # query solr to get the objects to reharvest rows = 25 req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("rows", str(rows)) req.setParam("fl", "id") done = False count = 0 while not done: req.setParam("start", str(count)) out = ByteArrayOutputStream() services.indexer.search(req, out) json = SolrResult(ByteArrayInputStream(out.toByteArray())) objectIds = HashSet(json.getFieldList("id")) if not objectIds.isEmpty(): portalManager.reharvest(objectIds) count = count + rows total = json.getNumFound() log.info(" * Queued {} of {}...", (min(count, total), total)) done = (count >= total) sessionState.remove("reharvest/running/" + portalId) result.put("status", "ok") result.put("message", "Objects in '%s' queued for reharvest" % portalId) else: response.setStatus(500) result.put("message", "No object or view specified for reharvest") elif func == "reindex": if oid: log.info(" * Reindexing object '{}'", oid) services.indexer.index(oid) services.indexer.commit() result.put("status", "ok") result.put("message", "Object '%s' queued for reindex" % portalId) else: response.setStatus(500) result.put("message", "No object specified to reindex") else: response.setStatus(500) result.put("message", "Unknown action '%s'" % func) else: response.setStatus(500) result.put("message", "Only administrative users can access this API") writer.println(result.toString()) writer.close()
def __activate__(self, context): self.log = context["log"] response = context["response"] writer = response.getPrintWriter("text/plain; charset=UTF-8") auth = context["page"].authentication sessionState = context["sessionState"] result = JsonObject() result.put("status", "error") result.put("message", "An unknown error has occurred") if auth.is_admin(): services = context["Services"] formData = context["formData"] func = formData.get("func") oid = formData.get("oid") portalId = formData.get("portalId") portalManager = services.portalManager if func == "reharvest": if oid: self.log.debug("Reharvesting object '{}'", oid) self.sendMessage(oid) result.put("status", "ok") result.put("message", "Object '%s' queued for reharvest") elif portalId: self.log.debug("Reharvesting view '{}'", portalId) sessionState.set("reharvest/running/" + portalId, "true") # TODO security filter - not necessary because this requires admin anyway? portal = portalManager.get(portalId) query = "*:*" if portal.query != "": query = portal.query if portal.searchQuery != "": if query == "*:*": query = portal.searchQuery else: query = query + " AND " + portal.searchQuery # query solr to get the objects to reharvest rows = 25 req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') req.setParam("rows", str(rows)) req.setParam("fl", "id") done = False count = 0 while not done: req.setParam("start", str(count)) out = ByteArrayOutputStream() services.indexer.search(req, out) json = SolrResult( ByteArrayInputStream(out.toByteArray())) objectIds = HashSet(json.getFieldList("id")) if not objectIds.isEmpty(): for oid in objectIds: self.sendMessage(oid) count = count + rows total = json.getNumFound() self.log.debug("Queued {} of {}...", min(count, total), total) done = (count >= total) sessionState.remove("reharvest/running/" + portalId) result.put("status", "ok") result.put( "message", "Objects in '%s' queued for reharvest" % portalId) else: response.setStatus(500) result.put("message", "No object or view specified for reharvest") elif func == "reindex": if oid: self.log.debug("Reindexing object '{}'", oid) self.sendMessage(oid) result.put("status", "ok") result.put("message", "Object '%s' queued for reindex" % oid) else: response.setStatus(500) result.put("message", "No object specified to reindex") else: response.setStatus(500) result.put("message", "Unknown action '%s'" % func) else: response.setStatus(500) result.put("message", "Only administrative users can access this API") writer.println(result.toString()) writer.close()