def __getStorageId(self, oid): req = SearchRequest('id:"%s"' % oid) req.addParam("fl", "storage_id") out = ByteArrayOutputStream() Services.indexer.search(req, out) json = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) return json.getList("response/docs").get(0).get("storage_id")
def __isIndexed(self, oid): query = 'id:"%s"' % oid req = SearchRequest(query) req.addParam("fq", 'item_type:"object"') out = ByteArrayOutputStream() self.Services.indexer.search(req, out) solrData = SolrResult(ByteArrayInputStream(out.toByteArray())) return solrData.getNumFound()!=0
def __activate__(self, context): response = context["response"] writer = response.getPrintWriter("text/plain; charset=UTF-8") auth = context["page"].authentication result = JsonConfigHelper() result.set("status", "error") result.set("message", "An unknown error has occurred") if auth.is_logged_in() and auth.is_admin(): services = context["Services"] formData = context["formData"] func = formData.get("func") oid = formData.get("oid") portalId = formData.get("portalId") portalManager = services.portalManager if func == "reharvest": if oid: print "Reharvesting object '%s'" % oid portalManager.reharvest("oid") result.set("status", "ok") result.set("message", "Object '%s' queued for reharvest") elif portalId: print " Reharvesting view '%s'" % portalId # TODO security filter # TODO this should loop through the whole portal, # not just the first page of results portal = portalManager.get(portalId) req = SearchRequest(portal.query) req.setParam("fq", 'item_type:"object"') out = ByteArrayOutputStream(); services.indexer.search(req, out) json = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) objectIds = json.getList("response/docs//id") if not objectIds.isEmpty(): portalManager.reharvest(objectIds) result.set("status", "ok") result.set("message", "Objects in '%s' queued for reharvest" % portalId) else: response.setStatus(500) result.set("message", "No object or view specified for reharvest") elif func == "reindex": if oid: print "Reindexing object '%s'" % oid services.indexer.index(oid) services.indexer.commit() result.set("status", "ok") result.set("message", "Objects in '%s' queued for reharvest" % portalId) else: response.setStatus(500) result.set("message", "No object specified to reindex") else: response.setStatus(500) result.set("message", "Unknown action '%s'" % func) else: response.setStatus(500) result.set("message", "Only administrative users can access this API") writer.println(result.toString()) writer.close()
def search_solr(self): query = "(rootUri:" if self.rootUriList: query += "(" + " OR ".join(self.rootUriList) + ")" else: query += "\"" + self.rootUri + "\"" if self.type: query += " AND type:\"" + self.type + "\"" query += ")" #print "**********", query req = SearchRequest(query) req.setParam("facet", "false") req.setParam("rows", str(99999)) req.setParam("sort", "dateCreated asc") req.setParam("start", str(0)) #security_roles = page.authentication.get_roles_list(); #security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' #req.addParam("fq", security_query) out = ByteArrayOutputStream() Services.indexer.annotateSearch(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) result = result.getJsonList("response/docs") # Every annotation for this URI if self.type == "http://www.purl.org/anotar/ns/type/0.1#Tag": return self.process_tags(result) else: return self.process_response(result)
def __loadSolrData(self, oid): portal = self.vc("page").getPortal() query = 'id:"%s"' % oid if portal.getSearchQuery(): query += " AND " + portal.getSearchQuery() req = SearchRequest(query) req.addParam("fq", 'item_type:"object"') req.addParam("fq", portal.getQuery()) out = ByteArrayOutputStream() self.vc("Services").getIndexer().search(req, out) return SolrResult(ByteArrayInputStream(out.toByteArray()))
def __loadSolrData(self, oid): portal = self.page.getPortal() query = 'id:"%s"' % oid if self.isDetail() and portal.getSearchQuery(): query += " AND " + portal.getSearchQuery() req = SearchRequest(query) req.addParam("fq", 'item_type:"object"') if self.isDetail(): req.addParam("fq", portal.getQuery()) out = ByteArrayOutputStream() self.services.getIndexer().search(req, out) self.__solrData = JsonConfigHelper(ByteArrayInputStream(out.toByteArray()))
def __getMetadata(self, oid): req = SearchRequest('id:%s' % oid) req.setParam("fq", 'item_type:"object"') # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() self.__indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) #self.log.info("result={}", result.toString()) return result.getJsonList("response/docs").get(0)
def __isLinked(self, ids, map): query = 'package_node_id:("' + '" OR "'.join(ids) + '")' req = SearchRequest(query) req.setParam("fq", 'recordtype:"master"') req.addParam("fq", 'item_type:"object"') req.setParam("rows", "9999") out = ByteArrayOutputStream() self.__indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) currentList = [] for doc in result.getJsonList("response/docs"): currentList.extend(doc.getList("package_node_id")) if type(map).__name__ == "LinkedHashMap": for author in map.keySet(): authorDocs = map.get(author) for doc in authorDocs: if doc.get("id") in currentList: doc.set("linked", "true") else: for author in map.keys(): authorList = map[author] for count in authorList: doc = authorList[count] if doc.get("id") in currentList: doc.set("linked", "true")
def __getAuthorDetails(self, citationIds): query = " OR id:".join(citationIds) req = SearchRequest('id:%s' % query) req.setParam("fq", 'recordtype:"author"') req.addParam("fq", 'item_type:"object"') req.setParam("rows", "9999") # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() self.__indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) return result.getJsonList("response/docs")
def getAuthorities(self): req = SearchRequest('package_node_id:%s' % self.metadata.get("id")) req.setParam("fq", 'recordtype:"master"') req.addParam("fq", 'item_type:"object"') req.setParam("rows", "9999") # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) docs = result.getJsonList("response/docs") return docs
def getSearchTerms(self): searchTerms = [] prefix = self.getSuggestionPrefix() query = '%(prefix)s OR %(prefix)s*' % { "prefix" : prefix } req = SearchRequest(query) req.addParam("fq", self.page.getPortal().getQuery()) req.addParam("fq", 'item_type:"object"') req.setParam("rows", "50") req.setParam("fl", "score,id,dc_title") req.setParam("sort", "score desc") out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) docs = result.getJsonList("response/docs") for doc in docs: dc_title = doc.getList("dc_title").get(0) searchTerms.append(dc_title) return '", "'.join(searchTerms)
def __search(self): recordsPerPage = self.__portal.recordsPerPage query = formData.get("query") if query is None or query == "": query = "*:*" req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", "true") req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", "f_dc_title asc") # setup facets action = formData.get("action") value = formData.get("value") fq = sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 name = formData.get("name") print " * add_fq: %s" % value req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) req.addParam("fq", 'item_type:"object"') portalQuery = self.__portal.query print " * portalQuery=%s" % portalQuery if portalQuery: req.addParam("fq", portalQuery) self.__selected = req.getParams("fq") sessionState.set("fq", self.__selected) sessionState.set("pageNum", self.__pageNum) req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) print " * search.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() Services.indexer.search(req, out) self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, int(self.__result.get("response/numFound")), self.__portal.recordsPerPage)
def __search(self): recordsPerPage = self.__portal.recordsPerPage query = None if query is None or query == "": query = formData.get("query") if query is None or query == "": query = "*:*" req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", "1000") req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", "true") req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", "f_dc_title asc") # setup facets action = formData.get("verb") value = formData.get("value") fq = sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 name = formData.get("name") print " * add_fq: %s" % value req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) req.addParam("fq", 'item_type:"object"') portalQuery = self.__portal.query print " * portalQuery=%s" % portalQuery if portalQuery: req.addParam("fq", portalQuery) self.__selected = req.getParams("fq") sessionState.set("fq", self.__selected) sessionState.set("pageNum", self.__pageNum) req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) print " * single.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() Services.indexer.search(req, out) self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination( self.__pageNum, int(self.__result.get("response/numFound")), self.__portal.recordsPerPage ) print " * single.py: updating manifest..." portal = self.getPortal() manifest = portal.getJsonMap("manifest") # add new items from search for doc in self.__result.getList("response/docs"): hashId = md5.new(doc.get("id")).hexdigest() node = portal.get("manifest//node-%s" % hashId) if node is None: portal.set("manifest/node-%s/title" % hashId, doc.get("dc_title").get(0)) portal.set("manifest/node-%s/id" % hashId, doc.get("id")) # remove manifest items missing from search result # print manifest for key in manifest.keySet(): item = manifest.get(key) id = item.get("id") doc = self.__result.getList('response/docs[@id="%s"]' % id) if len(doc) == 0: portal.removePath("manifest//%s" % key) Services.getPortalManager().save(portal)
def __getSolrData(self): prefix = self.getSearchTerms() if prefix != "": query = 'dc_title:"%(prefix)s" OR dc_title:"%(prefix)s*"' % { "prefix" : prefix } else: query = "*:*" portal = self.services.portalManager.get(self.portalId) if portal.searchQuery != "*:*": query = query + " AND " + portal.searchQuery req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') if portal.query: req.addParam("fq", portal.query) req.setParam("fl", "score") req.setParam("sort", "score desc, f_dc_title asc") req.setParam("start", self.getStartIndex()) req.setParam("rows", self.getItemsPerPage()) try: out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) return JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) except Exception, e: self.log.error("Failed to lookup '{}': {}", prefix, str(e))
def __getSolrData(self): prefix = self.getSearchTerms() if prefix: query = '%(prefix)s OR %(prefix)s*' % { "prefix" : prefix } else: query = "*:*" req = SearchRequest(query) req.addParam("fq", 'item_type:"object"') req.setParam("fl", "score") req.setParam("sort", "score desc") req.setParam("start", self.getStartIndex()) req.setParam("rows", self.getItemsPerPage()) try: out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) return JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) except Exception, e: self.log.error("Failed to lookup '{}': {}", prefix, str(e))
def __feed(self): portal = Services.getPortalManager().get(portalId) recordsPerPage = portal.recordsPerPage pageNum = sessionState.get("pageNum", 1) query = "*:*" req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.sort", "true") req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) else: fq = sessionState.get("fq") req.setParam("fq", fq) req.setParam("start", str((pageNum - 1) * recordsPerPage)) print " * query: ", query print " * portalQuery='%s'" % portalQuery print " * feed.py:", req.toString() out = ByteArrayOutputStream() Services.indexer.search(req, out) self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray()))
def __search(self): query = formData.get("query") searchQuery = sessionState.get("searchQuery") if query is None or query == "": query = "*:*" if searchQuery and query == "*:*": query = searchQuery elif searchQuery: query += " AND " + searchQuery facetField = formData.get("facet.field") req = SearchRequest(query) req.setParam("facet", "true") req.setParam("fl", "id") req.setParam("rows", "0") req.setParam("facet.limit", "-1") req.setParam("facet.field", facetField) fq = sessionState.get("fq") if fq is not None: req.setParam("fq", fq) req.addParam("fq", 'item_type:"object"') # Make sure 'fq' has already been set in the session security_roles = self.authentication.get_roles_list(); security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer = Services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) self.__facetList = FacetList(facetField, result)
def __search(self): query = formData.get("query") if query is None or query == "": query = "*:*" req = SearchRequest(query) req.setParam("facet", ["true"]) req.setParam("fl", ["id"]) req.setParam("fq", ['item_type:"object"']) req.setParam("rows", ["100"]) req.setParam("facet.field", "file_path") out = ByteArrayOutputStream() indexer = Services.getIndexer() indexer.search(req, out) self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) self.__facetList = FacetList("file_path", self.__result)
def __search(self): self.log.debug(" === __search()") self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") self.log.debug(" === ID: '{}'", id) query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator:"): query = "id:" + id.replace("oai:fascinator:", "") # Or a custom OAI ID else: query = "oai_identifier:" + id.replace(":", "\\:") self.log.debug(" === QUERY: '{}'", query) req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query self.log.debug(" * portalQuery={}", portalQuery) if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Check if there's resumption token exist in the formData if self.__currentToken: start = self.__currentToken.getStart() totalFound = self.__currentToken.getTotalFound() nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: self.__token = ResumptionToken(start = nextTokenStart, \ metadataPrefix = self.__metadataPrefix, \ sessionExpiry = self.__sessionExpiry) else: start = 0 metadataPrefix = self.vc("formData").get("metadataPrefix") self.__token = ResumptionToken(start = recordsPerPage, \ metadataPrefix = self.__metadataPrefix, \ sessionExpiry = self.__sessionExpiry) req.setParam("start", str(start)) self.log.debug(" * oai.py:", req.toString()) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: self.__token = None elif self.__token: if self.__token.getStart() < totalFound: self.__token.setTotalFound(totalFound) else: self.__token = None #Storing the resumptionToken to session if self.__token: self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken()) #Need to know how long the server need to store this token self.sessionState.set("resumptionTokenList", self.__resumptionTokenList)
def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.__portalName) recordsPerPage = portal.recordsPerPage # Resolve our identifier id = self.vc("formData").get("identifier") query = "*:*" if id is not None and id != "": # A default TF2 OID if id.startswith("oai:fascinator.usq.edu.au:"): idString = id.replace("oai:fascinator.usq.edu.au:", "") idString = self.__escapeQuery(idString) query = "id:" + idString # Or a custom OAI ID else: idString = self.__escapeQuery(id) query = "oai_identifier:" + idString req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") # Date data... is supplied fromDate = self.__request.getFromDate() untilDate = self.__request.getUntilDate() if fromDate is not None: fromStr = fromDate.isoformat() + "Z" self.log.debug("From Date: '{}'", fromStr) if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilStr) queryStr = "last_modified:[%s TO %s]" % (fromStr, untilStr) else: queryStr = "last_modified:[%s TO *]" % (fromStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) else: if untilDate is not None: untilStr = untilDate.isoformat() + "Z" self.log.debug("Until Date: '{}'", untilDate.isoformat()) queryStr = "last_modified:[* TO %s]" % (untilStr) self.log.debug("Date query: '{}'", queryStr) req.addParam("fq", queryStr) # Check if there's resumption token exist in the formData newToken = None if self.__currentToken is not None: start = int(self.__currentToken.getStart()) totalFound = int(self.__currentToken.getTotalFound()) nextTokenStart = start + recordsPerPage if nextTokenStart < totalFound: newToken = self.__currentToken newToken.resetExpiry(self.__sessionExpiry) newToken.setStart(nextTokenStart) # or start a new resumption token else: start = 0 newToken = ResumptionToken(None, recordsPerPage, \ self.__metadataPrefix, self.__sessionExpiry) req.setParam("start", str(start)) out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: newToken = None # If an ID was requested, and not found, this is an error if id is not None and id != "": self.__request.setError("idDoesNotExist", "ID: '%s' not found" % id) else: self.__request.setError("noRecordsMatch", "No records match this request") # We need to store this for NEW tokens elif self.__currentToken is None: # Assuming there are enough results to even keep the token if newToken.getStart() < totalFound: newToken.setTotalFound(totalFound) else: newToken = None # Check if we need to remove the resumption token else: if self.__result.getResults().size() < recordsPerPage: self.tokensDB.removeToken(self.__currentToken) # Store/update the resumption token if newToken is not None: # Brand new token if self.__currentToken is None: self.tokensDB.storeToken(newToken) # Or update an old token else: self.tokensDB.updateToken(newToken) self.__currentToken = newToken
def __search(self): requireEscape = False recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(self.request.getAttribute("RequestURI")) query = None pagePath = self.__portal.getName() + "/" + self.pageName if query is None or query == "": query = self.formData.get("query") requireEscape = True if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query if requireEscape: query = self.__escapeQuery(query) query = "%s:%s" % (self.__searchField, query) self.sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query if requireEscape: anotarQuery = self.__escapeQuery(anotarQuery) annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() self.services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = SolrResult(ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getResults() ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.get("annotatesUri") ids.add(annotatesUri) print "Found annotation for %s" % annotatesUri # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", Boolean.toString(self.__portal.getFacetSort())) req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", self.__sortBy) # setup facets if self.__useSessionNavigation: action = self.formData.get("verb") value = self.formData.get("value") fq = self.sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) else: navUri = uri[len(pagePath):] self.__pageNum, fq, self.__fqParts = self.__parseUri(navUri) savedfq = self.sessionState.get("savedfq") limits = [] if savedfq: limits.extend(savedfq) if fq: limits.extend(fq) self.sessionState.set("savedfq", limits) for q in fq: req.addParam("fq", URLDecoder.decode(q, "UTF-8")) portalQuery = self.__portal.query if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", 'item_type:"object"') if req.getParams("fq"): self.__selected = ArrayList(req.getParams("fq")) if self.__useSessionNavigation: self.sessionState.set("fq", self.__selected) self.sessionState.set("searchQuery", portalSearchQuery) self.sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not self.page.authentication.is_admin(): current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req.addParam("fq", security_query) req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) print " * search.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, int(self.__result.getNumFound()), self.__portal.recordsPerPage)
def numberOfModifiedRecord(self): indexer = self.services.getIndexer() portalQuery = self.services.getPortalManager().get(self.portal.getName()).getQuery() portalSearchQuery = self.services.getPortalManager().get(self.portal.getName()).getSearchQuery() # Security prep work current_user = self.page.authentication.get_username() security_roles = self.page.authentication.get_roles_list() security_filter = 'security_filter:("' + '" OR "'.join(security_roles) + '")' security_exceptions = 'security_exception:"' + current_user + '"' owner_query = 'owner:"' + current_user + '"' security_query = "(" + security_filter + ") OR (" + security_exceptions + ") OR (" + owner_query + ")" req = SearchRequest("modified:true") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not self.page.authentication.is_admin(): req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer.search(req, out) self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) return self.__result.get("response/numFound")
def __getSolrData(self): prefix = self.getSearchTerms() print "prefix='%s'" % prefix if prefix: query = "dc_title:%(prefix)s OR dc_title:%(prefix)s*" % {"prefix": prefix} query += " OR f_dc_identifier:%(ns)s%(prefix)s OR f_dc_identifier:%(ns)s%(prefix)s*" % { "prefix": prefix, "ns": "http\://example.com/arc/", } else: query = "*:*" portal = self.services.portalManager.get(self.portalId) if portal.searchQuery != "*:*" and portal.searchQuery != "": query = query + " AND " + portal.searchQuery req = SearchRequest(query) req.setParam("fq", 'item_type:"object"') if portal.query: req.addParam("fq", portal.query) req.setParam("fl", "score") req.setParam("sort", "score desc") req.setParam("start", self.getStartIndex()) req.setParam("rows", self.getItemsPerPage()) req.setParam("facet", "true") req.setParam("facet.field", "repository_name") req.setParam("facet.mincount", "1") ns = self.getNamespace() level = self.getFormData("level", None) if level and level != "top": req.addParam("fq", 'repository_name:"%s"' % level.replace(ns, "")) try: out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) results = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) if level == "top": narrowerMap = {} for doc in results.getJsonList("response/docs"): value = doc.getList("repository_name").get(0) hash = md5.md5(value).hexdigest() if not narrowerMap.has_key(hash): # print value, hash narrowerMap[hash] = [] narrowerMap[hash].append(doc.get("id")) docs = ArrayList() facets = results.getList("facet_counts/facet_fields/repository_name") for i in range(0, len(facets), 2): value = facets[i] hash = md5.md5(value).hexdigest() # print value,hash doc = JsonConfigHelper() doc.set("score", "1") doc.set("dc_identifier", "%s%s" % (ns, value)) doc.set("skos_inScheme", ns) doc.set("skos_broader", "%s%s" % (ns, value)) doc.set("skos_narrower", '", "'.join(narrowerMap[hash])) doc.set("skos_prefLabel", value) docs.add(doc) results.removePath("response/docs") results.setJsonList("response/docs", docs) return results except Exception, e: self.log.error("Failed to lookup '{}': {}", prefix, str(e))
def __search(self): query = formData.get("query") if query is None or query == "": query = "*:*" facetField = formData.get("facet.field") req = SearchRequest(query) req.setParam("facet", "true") req.setParam("fl", "id") req.setParam("rows", "0") req.setParam("facet.limit", "-1") req.setParam("facet.field", facetField) fq = sessionState.get("fq") if fq is not None: req.setParam("fq", fq) req.addParam("fq", 'item_type:"object"') out = ByteArrayOutputStream() indexer = Services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) self.__facetList = FacetList(facetField, result)
def __search(self): recordsPerPage = self.__portal.recordsPerPage uri = URLDecoder.decode(request.getAttribute("RequestURI")) query = None pagePath = portalId + "/" + pageName if uri != pagePath: query = uri[len(pagePath)+1:] if query is None or query == "": query = formData.get("query") if query is None or query == "": query = "*:*" if query == "*:*": self.__query = "" else: self.__query = query sessionState.set("query", self.__query) # find objects with annotations matching the query if query != "*:*": anotarQuery = self.__query annoReq = SearchRequest(anotarQuery) annoReq.setParam("facet", "false") annoReq.setParam("rows", str(99999)) annoReq.setParam("sort", "dateCreated asc") annoReq.setParam("start", str(0)) anotarOut = ByteArrayOutputStream() Services.indexer.annotateSearch(annoReq, anotarOut) resultForAnotar = JsonConfigHelper(ByteArrayInputStream(anotarOut.toByteArray())) resultForAnotar = resultForAnotar.getJsonList("response/docs") ids = HashSet() for annoDoc in resultForAnotar: annotatesUri = annoDoc.get("annotatesUri") ids.add(annotatesUri) print "Found annotation for %s" % annotatesUri # add annotation ids to query query += ' OR id:("' + '" OR "'.join(ids) + '")' portalSearchQuery = self.__portal.searchQuery if portalSearchQuery == "": portalSearchQuery = query else: if query != "*:*": query += " AND " + portalSearchQuery else: query = portalSearchQuery req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", self.__portal.facetFieldList) req.setParam("facet.sort", "true") req.setParam("facet.limit", str(self.__portal.facetCount)) req.setParam("sort", "f_dc_title asc") # setup facets action = formData.get("verb") value = formData.get("value") fq = sessionState.get("fq") if fq is not None: self.__pageNum = 1 req.setParam("fq", fq) if action == "add_fq": self.__pageNum = 1 name = formData.get("name") print " * add_fq: %s" % value req.addParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "remove_fq": self.__pageNum = 1 req.removeParam("fq", URLDecoder.decode(value, "UTF-8")) elif action == "clear_fq": self.__pageNum = 1 req.removeParam("fq") elif action == "select-page": self.__pageNum = int(value) req.addParam("fq", 'item_type:"object"') portalQuery = self.__portal.query print " * portalQuery=%s" % portalQuery if portalQuery: req.addParam("fq", portalQuery) self.__selected = list(req.getParams("fq")) sessionState.set("fq", self.__selected) sessionState.set("searchQuery", portalSearchQuery) sessionState.set("pageNum", self.__pageNum) # Make sure 'fq' has already been set in the session if not page.authentication.is_admin(): security_roles = page.authentication.get_roles_list() security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' current_user = page.authentication.get_username() owner_query = 'owner:"' + current_user + '"' req.addParam("fq", "(" + security_query + ") OR (" + owner_query + ")") req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) print " * search.py:", req.toString(), self.__pageNum out = ByteArrayOutputStream() Services.indexer.search(req, out) self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) if self.__result is not None: self.__paging = Pagination(self.__pageNum, int(self.__result.get("response/numFound")), self.__portal.recordsPerPage)
def __search(self): self.__result = SolrResult(None) portal = self.services.getPortalManager().get(self.vc("portalId")) recordsPerPage = portal.recordsPerPage query = self.vc("formData").get("query") if query is None or query == "": query = "*:*" req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query print " * portalQuery=%s" % portalQuery if portalQuery: req.addParam("fq", portalQuery) req.addParam("fq", "item_type:object") #Check if there's resumption token exist in the formData if self.__currentToken: start = self.__currentToken.getStart() totalFound = self.__currentToken.getTotalFound() nextTokenStart = start+recordsPerPage if nextTokenStart < totalFound: self.__token = ResumptionToken(start=nextTokenStart, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry) else: start = 0 metadataPrefix = self.vc("formData").get("metadataPrefix") self.__token = ResumptionToken(start=recordsPerPage, metadataPrefix=self.__metadataPrefix, sessionExpiry=self.__sessionExpiry) req.setParam("start", str(start)) print " * oai.py:", req.toString() out = ByteArrayOutputStream() self.services.indexer.search(req, out) self.__result = SolrResult(ByteArrayInputStream(out.toByteArray())) totalFound = self.__result.getNumFound() if totalFound == 0: self.__token = None elif self.__token: if self.__token.getStart() < totalFound: self.__token.setTotalFound(totalFound) else: self.__token = None #Storing the resumptionToken to session if self.__token: self.__resumptionTokenList[self.__token.getToken()] = self.__token #(totalFound, self.__token.getConstructedToken()) #Need to know how long the server need to store this token self.sessionState.set("resumptionTokenList", self.__resumptionTokenList)
def __search(self): indexer = Services.getIndexer() portalQuery = Services.getPortalManager().get(portalId).getQuery() portalSearchQuery = Services.getPortalManager().get(portalId).getSearchQuery() # Security prep work current_user = page.authentication.get_username() security_roles = page.authentication.get_roles_list() security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' owner_query = 'owner:"' + current_user + '"' req = SearchRequest("last_modified:[NOW-1MONTH TO *]") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not page.authentication.is_admin(): req.addParam("fq", "(" + security_query + ") OR (" + owner_query + ")") out = ByteArrayOutputStream() indexer.search(req, out) self.__latest = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) req = SearchRequest(owner_query) req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not page.authentication.is_admin(): req.addParam("fq", "(" + security_query + ") OR (" + owner_query + ")") out = ByteArrayOutputStream() indexer.search(req, out) self.__mine = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) req = SearchRequest('workflow_security:"' + current_user + '"') req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.setParam("rows", "10") req.setParam("sort", "last_modified desc, f_dc_title asc"); if not page.authentication.is_admin(): req.addParam("fq", "(" + security_query + ") OR (" + owner_query + ")") out = ByteArrayOutputStream() indexer.search(req, out) self.__workflows = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) req = SearchRequest("*:*") req.setParam("fq", 'item_type:"object"') if portalQuery: req.addParam("fq", portalQuery) if portalSearchQuery: req.addParam("fq", portalSearchQuery) req.addParam("fq", "") req.setParam("rows", "0") if not page.authentication.is_admin(): req.addParam("fq", "(" + security_query + ") OR (" + owner_query + ")") out = ByteArrayOutputStream() indexer.search(req, out) sessionState.set("fq", 'item_type:"object"') #sessionState.set("query", portalQuery.replace("\"", "'")) self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray()))
def __search(self): self.__result = JsonConfigHelper() portal = Services.getPortalManager().get(portalId) recordsPerPage = portal.recordsPerPage query = formData.get("query") if query is None or query == "": query = "*:*" req = SearchRequest(query) req.setParam("facet", "true") req.setParam("rows", str(recordsPerPage)) req.setParam("facet.field", portal.facetFieldList) req.setParam("facet.limit", str(portal.facetCount)) req.setParam("sort", "f_dc_title asc") portalQuery = portal.query print " * portalQuery=%s" % portalQuery if portalQuery: req.addParam("fq", portalQuery) # TODO resumptionToken #req.setParam("start", str((self.__pageNum - 1) * recordsPerPage)) print " * oai.py:", req.toString() out = ByteArrayOutputStream() Services.indexer.search(req, out) self.__result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray()))
def __getSolrData(self): prefix = self.getSearchTerms() if prefix: query = '%(prefix)s OR %(prefix)s*' % { "prefix" : prefix } else: query = "*:*" level = self.getFormData("level", None) if level is not None: if level=="top": #query += " AND skos_hasTopConcept:http*" query += ' AND dc_identifier:"http://purl.org/anzsrc/seo/#division"' else: query += ' AND skos_broader:"%s"' % level req = SearchRequest(query) req.addParam("fq", 'item_type:"object"') req.addParam("fq", 'repository_type:"SEO"') req.setParam("fl", "score") req.setParam("sort", "score desc") req.setParam("start", self.getStartIndex()) req.setParam("rows", self.getItemsPerPage()) try: out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) return JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) except Exception, e: self.log.error("Failed to lookup '{}': {}", prefix, str(e))
def getSuggestedNames(self): # search common forms lookupNames = [] surname = self.__metadata.getList("surname").get(0) firstName = self.__metadata.getList("firstName").get(0) firstInitial = firstName[0].upper() secondName = self.__metadata.getList("secondName") if not secondName.isEmpty(): secondName = secondName.get(0) if secondName and secondName != "": secondInitial = secondName[0].upper() lookupNames.append("%s, %s. %s." % (surname, firstInitial, secondInitial)) lookupNames.append("%s, %s %s." % (surname, firstName, secondInitial)) lookupNames.append("%s, %s %s" % (surname, firstName, secondName)) lookupNames.append("%s %s %s" % (firstName, secondName, surname)) lookupNames.append("%s, %s." % (surname, firstInitial)) lookupNames.append("%s, %s" % (surname, firstName)) lookupNames.append("%s %s" % (firstName, surname)) query = '" OR dc_title:"'.join(lookupNames) # general word search from each part of the name parts = [p for p in self.getPackageTitle().split(" ") if len(p) > 0] query2 = " OR dc_title:".join(parts) req = SearchRequest('(dc_title:"%s")^2.5 OR (dc_title:%s)^0.5' % (query, query2)) self.log.info("suggestedNames query={}", req.query) req.setParam("fq", 'recordtype:"author"') req.addParam("fq", 'item_type:"object"') req.setParam("rows", "9999") req.setParam("fl", "score") req.setParam("sort", "score desc") # Make sure 'fq' has already been set in the session ##security_roles = self.authentication.get_roles_list(); ##security_query = 'security_filter:("' + '" OR "'.join(security_roles) + '")' ##req.addParam("fq", security_query) out = ByteArrayOutputStream() indexer = self.services.getIndexer() indexer.search(req, out) result = JsonConfigHelper(ByteArrayInputStream(out.toByteArray())) # self.log.info("result={}", result.toString()) docs = result.getJsonList("response/docs") map = LinkedHashMap() for doc in docs: authorName = doc.getList("dc_title").get(0) if map.containsKey(authorName): authorDocs = map.get(authorName) else: authorDocs = ArrayList() map.put(authorName, authorDocs) authorDocs.add(doc) self.__maxScore = max(1.0, float(result.get("response/maxScore"))) return map