def _rewriteQuery(self, session, query): if not hasattr(query, 'leftOperand'): if query.relation.value == "all" : # rewrite to AND triples nbool = " and " elif query.relation.value == "any": nbool = " or " elif query.relation.value == "=" and not query.term.value.isnumeric() and query.term.value.index(' ') > -1: nbool = " prox " else: # can't rewrite return None # now split on spaces terms = query.term.value.split(' ') if len(terms) == 1: return None nq = [] for t in terms: nq.append(' '.join([query.index.toCQL(), query.relation.toCQL(), '"' + t + '"'])) newstr = nbool.join(nq) newQuery = cql.parse(newstr) return newQuery else: n = self._rewriteQuery(session, query.leftOperand) if n: query.leftOperand = n n = self._rewriteQuery(session, query.rightOperand) if n: query.rightOperand = n return None
def value_of(elem): #typehash = {'int' : int, 'long' : long, 'bool' : bool, 'float' : float} t = elem.attrib['t'] txt = unescape(elem.text) if t == 'pickle': val = pickle.loads(txt.encode('utf-8')) elif t == 'None': val = None elif t == 'object': # dereference id db = session.server.get_object(session, session.database) val = db.get_object(session, txt) elif t == 'cql': try: val = cqlParser.parse(txt) except: raise elif t in dsrlz_typehash: if type(txt) == unicode and t != 'unicode': val = dsrlz_typehash[t](txt.encode('utf-8')) else: val = dsrlz_typehash[t](txt) else: val = txt return val
def _rewriteQuery(self, session, query): if not hasattr(query, 'leftOperand'): if query.relation.value == "all": # Rewrite to AND triples nbool = " and " elif query.relation.value == "any": nbool = " or " elif (query.relation.value == "=" and not query.term.value.isnumeric() and query.term.value.index(' ') > -1): nbool = " prox " else: # Can't rewrite return None # Now split on spaces terms = query.term.value.split(' ') if len(terms) == 1: return None nq = [] for t in terms: nq.append(' '.join([ query.index.toCQL(), query.relation.toCQL(), '"' + t + '"' ])) newstr = nbool.join(nq) newQuery = cql.parse(newstr) return newQuery else: n = self._rewriteQuery(session, query.leftOperand) if n: query.leftOperand = n n = self._rewriteQuery(session, query.rightOperand) if n: query.rightOperand = n return None
def parse(self, session, data, codec, db): form = data qClauses = [] bools = [] i = 1 while "fieldcont{0}".format(i) in form: boolean = form.getfirst("fieldbool{0}".format(i - 1), "and/relevant/proxinfo") bools.append(boolean) i += 1 i = 1 while "fieldcont{0}".format(i) in form: cont = form.getfirst("fieldcont{0}".format(i)) idxs = unquote(form.getfirst("fieldidx{0}".format(i), "cql.anywhere")) rel = unquote(form.getfirst("fieldrel{0}".format(i), "all/relevant/proxinfo")) idxClauses = [] # in case they're trying to do phrase searching if rel.startswith("exact") or rel.startswith("=") or rel.find("/string") != -1: # don't allow phrase searching for exact or /string searches cont = cont.replace('"', '\\"') for idx in idxs.split("||"): subClauses = [] if rel.startswith("all"): subBool = " and/relevant/proxinfo " else: subBool = " or/relevant/proxinfo " # in case they're trying to do phrase searching if rel.find("exact") != -1 or rel.find("=") != -1 or rel.find("/string") != -1: # don't allow phrase searching for exact or /string searches # we already did quote escaping pass else: phrases = self.phraseRe.findall(cont) for ph in phrases: subClauses.append("({0} =/relevant/proxinfo {1})".format(idx, ph)) cont = self.phraseRe.sub("", cont) if idx and rel and cont: subClauses.append("{0} {1} {2}".format(idx, rel, cont.strip())) if len(subClauses): idxClauses.append("({0})".format(subBool.join(subClauses))) qClauses.append("({0})".format(" or/rel.combine=sum/proxinfo ".join(idxClauses))) # if there's another clause and a corresponding boolean try: qClauses.append(bools[i]) except: break i += 1 qString = " ".join(qClauses) formcodec = form.getfirst("_charset_", "utf-8") return cql.parse(qString.decode(formcodec).encode("utf-8"))
def fetch_query(self, session, id): """Fetch query data, parse it into a query object and return.""" cql = self.fetch_data(session, id) q = cqlParser.parse(cql) q.id = id try: rsid = self.fetch_data(session, "__rset_%s" % id) except ObjectDoesNotExistException: pass else: self.resultSetId = rsid return q
def process_scan(self, opts, result): db = session.config.parent session.database = db.id if 'scanClause' in opts: q = cqlParser.parse(opts['scanClause']) opts['xQuery'] = etree.XML(q.toXCQL()) else: raise self.diagnostic(7, msg="Mandatory parameter not supplied", details='scanClause') mt = opts.get('maximumTerms', 20) rp = opts.get('responsePosition', 0) if (rp < 0 or rp > (mt + 1)): raise self.diagnostic(120, msg="Response position out of range", details=str(rp)) if (not q.term.value): q.term.value = chr(0) q.config = session.config if (rp == 1): data = db.scan(session, q, mt, direction=">=") elif (rp == 0): data = db.scan(session, q, mt, direction=">") elif (rp == mt): data = db.scan(session, q, mt, direction="<=") data.reverse() elif (rp == mt + 1): data = db.scan(session, q, mt, direction="<") data.reverse() else: # Need to go up and down data1 = db.scan(session, q, mt - rp + 1, direction=">=") data = db.scan(session, q, rp, direction="<=") if data1[0][0] == data[0][0]: data = data[1:] data.reverse() data.extend(data1) terms = elemFac.terms() for d in data: t = self.term(value=d[0], num=d[1][1]) self.extraData('term', opts, t, d) terms.append(t) result.append(terms) return result
def fetch_query(self, session, id): """Fetch query data, parse it into a query object and return.""" cql = self.fetch_data(session, id) if cql is not None and cql: q = cqlParser.parse(cql) elif (isinstance(cql, DeletedObject)): raise ObjectDeletedException(cql) else: raise ObjectDoesNotExistException(id) q.id = id try: rsid = self.fetch_data(session, "__rset_%s" % id) except ObjectDoesNotExistException: pass else: self.resultSetId = rsid return q
def process_scan(self, opts, result): db = session.config.parent session.database = db.id if 'scanClause' in opts: q = cqlParser.parse(opts['scanClause']) opts['xQuery'] = etree.XML(q.toXCQL()) else: raise self.diagnostic(7, msg="Mandatory parameter not supplied", details='scanClause') mt = opts.get('maximumTerms', 20) rp = opts.get('responsePosition', 0) if (rp < 0 or rp > (mt+1)): raise self.diagnostic(120, msg="Response position out of range", details=str(rp)) if (not q.term.value): q.term.value = chr(0) q.config = session.config if (rp == 1): data = db.scan(session, q, mt, direction=">=") elif (rp == 0): data = db.scan(session, q, mt, direction=">") elif (rp == mt): data = db.scan(session, q, mt, direction="<=") data.reverse() elif (rp == mt+1): data = db.scan(session, q, mt, direction="<") data.reverse() else: # Need to go up and down data1 = db.scan(session, q, mt-rp+1, direction=">=") data = db.scan(session, q, rp, direction="<=") if data1[0][0] == data[0][0]: data = data[1:] data.reverse() data.extend(data1) terms = elemFac.terms() for d in data: t = self.term(value=d[0], num=d[1][1]) self.extraData('term', opts, t, d) terms.append(t) result.append(terms) return result
def parse(self, session, data, codec, db): form = data qClauses = [] bools = [] i = 1 while 'fieldcont{0}'.format(i) in form: boolean = form.getfirst('fieldbool{0}'.format(i - 1), 'and/relevant/proxinfo') bools.append(boolean) i += 1 i = 1 while 'fieldcont{0}'.format(i) in form: cont = form.getfirst('fieldcont{0}'.format(i)) idxs = unquote( form.getfirst('fieldidx{0}'.format(i), 'cql.anywhere')) rel = unquote( form.getfirst('fieldrel{0}'.format(i), 'all/relevant/proxinfo')) idxClauses = [] # in case they're trying to do phrase searching if (rel.startswith('exact') or rel.startswith('=') or rel.find('/string') != -1): # don't allow phrase searching for exact or /string searches cont = cont.replace('"', '\\"') for idx in idxs.split('||'): subClauses = [] if (rel.startswith('all')): subBool = ' and/relevant/proxinfo ' else: subBool = ' or/relevant/proxinfo ' # in case they're trying to do phrase searching if (rel.find('exact') != -1 or rel.find('=') != -1 or rel.find('/string') != -1): # don't allow phrase searching for exact or /string searches # we already did quote escaping pass else: phrases = self.phraseRe.findall(cont) for ph in phrases: subClauses.append( '({0} =/relevant/proxinfo {1})'.format(idx, ph)) cont = self.phraseRe.sub('', cont) if (idx and rel and cont): subClauses.append('{0} {1} {2}'.format( idx, rel, cont.strip())) if (len(subClauses)): idxClauses.append('({0})'.format(subBool.join(subClauses))) qClauses.append('({0})'.format( ' or/rel.combine=sum/proxinfo '.join(idxClauses))) # if there's another clause and a corresponding boolean try: qClauses.append(bools[i]) except: break i += 1 qString = ' '.join(qClauses) formcodec = form.getfirst('_charset_', 'utf-8') return cql.parse(qString.decode(formcodec).encode('utf-8'))
def parse(self, session, data, codec, db): # XXX check codec, turn into unicode first return cql.parse(data)
def resultSetFacetsHandler(session, val, resp, resultSet=[], db=None): """Put facet for requested index into extraSearchRetrieveData val is a CQL query. Boolean used is meaningless, facets are returned for each clause. Term in each clause is also meaningless and need be nothing more than * Result looks something like browse response e.g. <facets> <facetByIndex index="dc.subject" relation"exact"> <term> <value>Genetics</value> <numberOfRecords>2</numberOfRecords> </term> ... </facet> ... </facets> """ # quick escapes if not len(resultSet) or db is None: return global namespaces, sruElemFac myNamespaces = namespaces.copy() myNamespaces['fct'] = "info:srw/extension/2/facets-1.0" pm = db.get_path(session, 'protocolMap') if not pm: db._cacheProtocolMaps(session) pm = db.protocolMaps.get('http://www.loc.gov/zing/srw/') self.paths['protocolMap'] = pm fctElemFac = ElementMaker(namespace=myNamespaces['fct'], nsmap=myNamespaces) def getFacets(query): if (isinstance(query, cql.SearchClause)): fctEl = fctElemFac.facetsByIndex({ 'index': query.index.toCQL(), 'relation': query.relation.toCQL() }) # fctEl.append(sruElemFac.index(query.index.toCQL())) # fctEl.append(sruElemFac.relation(query.relation.toCQL())) idx = pm.resolveIndex(session, query) if idx is None: fctEl.append( diagnosticToXML( cql.Diagnostic(code=16, message="Unsupported Index", details=query.index.toCQL()))) return fctEl try: facets = idx.facets(session, resultSet) except: # index doesn't support facets # TODO: diagnostic? facets = [] termsEl = sruElemFac.terms() for f in facets: termsEl.append( sruElemFac.term(sruElemFac.value(f[0]), sruElemFac.numberOfRecords(str(f[1][1])))) fctEl.append(termsEl) return [fctEl] else: fctEls = getFacets(query.leftOperand) fctEls.extend(getFacets(query.rightOperand)) return fctEls fctsEl = fctElemFac.facets() try: query = cql.parse(val) except cql.Diagnostic as d: fctsEl.append(diagnosticToXML(d)) return fctsEl for el in getFacets(query): fctsEl.append(el) return fctsEl
def handleScan(self, session, data): if (hasattr(data, 'stepSize')): step = data.stepSize else: step = 0 resp = ScanResponse() resp.stepSize = step resp.scanStatus = 1 resp.numberOfEntriesReturned = 0 resp.positionOfTerm = 0 try: dbs = data.databaseNames if len(dbs) != 1: # Can only scan one db at once? (XXX) raise ValueError nt = data.numberOfTermsRequested rp = data.preferredPositionInResponse if (rp < 0 or rp > (nt + 1)): # Busted numbers (XXX) raise ValueError dbname = dbs[0] cfg = self.session.configs.get(dbname, None) db = cfg.parent session.database = db.id where = data.termListAndStartPoint # Make it look like part of an RPN query... w = ('op', ('attrTerm', where)) clause = CQLUtils.rpn2cql(w, cfg) if not clause.term.value: clause.term.value = 'a' nstms = nt * (step + 1) terms = [] clause = cqlParser.parse(clause.toCQL()) if (rp == 1): data = db.scan(session, clause, nstms, direction=">=") elif (rp == 0): data = db.scan(session, clause, nstms, direction=">") elif (rp == mt): data = db.scan(session, clause, nstms, direction="<=") data.reverse() elif (rp == mt + 1): data = db.scan(session, clause, nstms, direction="<") data.reverse() else: # Need to go up and down data1 = db.scan(session, clause, nt - rp + 1, direction=">=") data = db.scan(session, clause, rp, direction="<=") if data1[0][0] == data[0][0]: data = data[1:] data.reverse() data.extend(data1) for d in data[::step + 1]: t = TermInfo() t.term = ('general', d[0]) t.globalOccurrences = d[1][1] terms.append(('termInfo', t)) resp.positionOfTerm = rp resp.numberOfEntriesReturned = len(terms) resp.scanStatus = 0 l = ListEntries() l.entries = terms resp.entries = l except Exception, err: l = ListEntries() d = self.generate_diagnostic(err) d.condition = 123 diag = [('defaultFormat', d)] l.nonsurrogateDiagnostics = diag resp.entries = l resp.numberOfEntriesReturned = 0 resp.scanStatus = 6
def handleSearch(self, session, data): # Must return a response no matter what resp = SearchResponse() resp.resultCount = 0 resp.numberOfRecordsReturned = 0 resp.nextResultSetPosition = 1 resp.searchStatus = 1 resp.resultSetStatus = 1 resp.presentStatus = PresentStatus.get_num_from_name('failure') try: queryType = data.query[0] query = ["", ""] if (queryType in ['type_1', 'type_101']): zQuery = data.query[1] attrset = zQuery.attributeSet query = ['rpn', zQuery.rpn] elif (queryType == 'type_0'): # A Priori external. We assume CQL query = ['cql', data.query[1]] elif (queryType == 'type_2'): # ISO8777 (CCL) rpn = ccl.mk_rpn_query(data.query[1]) query = ['rpn', rpn] elif (queryType == 'type_104'): # Look for CQL or SQL type104 = data.query[1].direct_reference if (type104 == Z3950_QUERY_CQL_ov): query = ['cql', data.query[1].encoding[1]] elif (type104 == Z3950_QUERY_SQL_ov): query = ['sql', data.query[1].encoding[1].queryExpression] # XXX Implement direct to postgres raise NotImplementedError else: # Undefined query type raise NotImplementedError elif (queryType in ['type_102', 'type_100']): # 102: Ranked List, not yet /defined/ let alone implemented # 100: Z39.58 query (Standard was withdrawn) raise NotImplementedError rsetname = data.resultSetName dbs = data.databaseNames resultSets = [] if query[0] == 'cql': q = CQLParser.parse(query[1]) for dbname in dbs: cfg = self.session.configs.get(dbname, None) if cfg is not None: db = cfg.parent if query[0] == 'rpn': self.log("Trying to convert: %s" % (repr(query[1]))) q = CQLUtils.rpn2cql(query[1], cfg) self.log("--> " + q.toCQL()) session.database = db.id q = cqlParser.parse(q.toCQL()) resultSets.append(db.search(session, q)) else: raise ValueError("%s not in %r" % (dbname, self.session.configs.keys())) if len(resultSets) > 1: rs = resultSets[0] for r in resultSets[1:]: rs.combine(r) elif len(resultSets) == 1: rs = resultSets[0] else: # No resultset. return self.encode(('searchResponse', resp)) resp.resultCount = len(rs) # Maybe put it into our DB if session.resultSets.has_key(rsetname): rsid = session.resultSets[rsetname] rs.id = rsid session.resultSetStore.store_resultSet(session, rs) else: rsid = session.resultSetStore.create_resultSet(session, rs) session.resultSets[rsetname] = rsid # only keep 4 at once keys = session.resultSetCache.keys() if len(keys) > 3: # delete one at random r = rand.randint(0, 3) del session.resultSetCache[keys[r]] session.resultSetCache[rsid] = rs except Exception, err: # XXX add -correct- diagnostic resp.numberOfRecordsReturned = 1 resp.nextResultSetPosition = 0 resp.resultSetStatus = 3 d = self.generate_diagnostic(err) diag = ('nonSurrogateDiagnostic', d) resp.records = diag
def process_scan(self, session, req): # Process a scan query config = req.config db = config.parent session.database = db.id self.terms = [] if (not req.version): diag = Diagnostic7() diag.message = "Mandatory 'version' parameter not supplied" diag.details = 'version' raise diag if req.scanClause: #convert clause into SearchClause object clause = CQLParser.parse(req.scanClause) # Stupid schema. xsc = [] xsc.append(clause.index.toXCQL()) xsc.append(clause.relation.toXCQL()) xsc.append(clause.term.toXCQL()) req.xScanClause = "".join(xsc) else: # Seriously broken request. f = Diagnostic7() f.message = 'Request must include a query' f.details = 'scanClause' raise f self.echoedScanRequest = req if (req.diagnostics): self.diagnostics = req.diagnostics return mt = req.get('maximumTerms') rp = req.get('responsePosition') if (rp < 0 or rp > (mt+1)): f = Diagnostic120() f.message = "Response position out of range" f.details = str(rp) raise f if (not clause.term.value): clause.term.value = chr(0) clause.config = config if (rp == 1): data = db.scan(session, clause, mt, direction=">=") elif (rp == 0): data = db.scan(session, clause, mt, direction=">") elif (rp == mt): data = db.scan(session, clause, mt, direction="<=") data.reverse() elif (rp == mt+1): data = db.scan(session, clause, mt, direction="<") data.reverse() else: # Need to go up and down data1 = db.scan(session, clause, mt-rp+1, direction=">=") data = db.scan(session, clause, rp, direction="<=") if data1[0][0] == data[0][0]: data = data[1:] data.reverse() data.extend(data1) for d in data: t = SRW.types.ScanTerm('ScanTerm') t.value = d[0] t.numberOfRecords = d[1][1] process_extraData(config.termExtensionHash, req, t, d) self.terms.append(t) process_extraData(config.scanExtensionHash, req, self) process_extraData(config.responseExtensionHash, req, self)
def parse(self, session, data, codec, db): form = data qString = generate_cqlQuery(form) return cql.parse(qString)
def resultSetFacetsHandler(session, val, resp, resultSet=[], db=None): """Put facet for requested index into extraSearchRetrieveData val is a CQL query. Boolean used is meaningless, facets are returned for each clause. Term in each clause is also meaningless and need be nothing more than * Result looks something like browse response e.g. <facets> <facetByIndex index="dc.subject" relation"exact"> <term> <value>Genetics</value> <numberOfRecords>2</numberOfRecords> </term> ... </facet> ... </facets> """ # quick escapes if not len(resultSet) or db is None: return global namespaces, sruElemFac myNamespaces = namespaces.copy() myNamespaces['fct'] = "info:srw/extension/2/facets-1.0" pm = db.get_path(session, 'protocolMap') if not pm: db._cacheProtocolMaps(session) pm = db.protocolMaps.get('http://www.loc.gov/zing/srw/') self.paths['protocolMap'] = pm fctElemFac = ElementMaker(namespace=myNamespaces['fct'], nsmap=myNamespaces) def getFacets(query): if (isinstance(query, cql.SearchClause)): fctEl = fctElemFac.facetsByIndex({'index': query.index.toCQL(), 'relation': query.relation.toCQL()}) # fctEl.append(sruElemFac.index(query.index.toCQL())) # fctEl.append(sruElemFac.relation(query.relation.toCQL())) idx = pm.resolveIndex(session, query) if idx is None: fctEl.append(diagnosticToXML(cql.Diagnostic(code=16, message="Unsupported Index", details=query.index.toCQL()))) return fctEl try: facets = idx.facets(session, resultSet) except: # index doesn't support facets # TODO: diagnostic? facets = [] termsEl = sruElemFac.terms() for f in facets: termsEl.append(sruElemFac.term( sruElemFac.value(f[0]), sruElemFac.numberOfRecords(str(f[1][1])) ) ) fctEl.append(termsEl) return [fctEl] else: fctEls = getFacets(query.leftOperand) fctEls.extend(getFacets(query.rightOperand)) return fctEls fctsEl = fctElemFac.facets() try: query = cql.parse(val) except cql.Diagnostic as d: fctsEl.append(diagnosticToXML(d)) return fctsEl for el in getFacets(query): fctsEl.append(el) return fctsEl
def process_searchRetrieve(self, session, req): if (not req.version): diag = Diagnostic7() diag.message = "Mandatory 'version' parameter not supplied" diag.details = 'version' raise diag # Get our config based on URL config = req.config db = config.parent session.database = db.id rss = db.get_object(session, 'defaultResultSetStore') # Setup for processing if (req.query != ""): req.queryStructure = CQLParser.parse(req.query) else: # No Query, Request is seriously Broken f = Diagnostic7() f.message = 'Request must include a query' f.details = 'query' raise f req.queryStructure.config = config req.xQuery = req.queryStructure.toXCQL() self.echoedSearchRetrieveRequest = req req.parseSortKeys() if (req.diagnostics): self.diagnostics = req.diagnostics return # Check if we recognise the record Schema schema = req.get('recordSchema') # Redirect to full value if (config.recordNamespaces.has_key(schema)): schema = config.recordNamespaces[schema] if (not schema in config.recordNamespaces.values()): diag = Diagnostic66() diag.details = schema raise diag txr = config.transformerHash.get(schema, None) recordPacking = req.get('recordPacking') if not recordPacking in ["string", "xml"]: diag = Diagnostic71() diag.details = req.recordPacking; raise diag # Fencepost. SRW starts at 1, C3 starts at 0 startRecord = req.get('startRecord') -1 maximumRecords = req.get('maximumRecords') ttl = req.get('resultSetTTL') nsk = len(req.sortStructure) rsn = req.queryStructure.getResultSetId() rs = db.search(session, req.queryStructure) recs = [] if (rs is not None): self.numberOfRecords = len(rs) if (ttl and not rsn): rs.expires = ttl rsn = rss.create_resultSet(session, rs) self.records = [] end = min(startRecord+maximumRecords, len(rs)) for rIdx in range(startRecord, end): rsi = rs[rIdx] r = rsi.fetch_record(session) ro = SRW.types.Record('record') ro.recordPacking = recordPacking ro.recordSchema = schema if (txr is not None): doc = txr.process_record(session, r) rec = doc.get_raw(session) rec = xmlver.sub("", rec) else: rec = r.get_xml(session) if recordPacking == "string": ro.recordData = escape(rec) else: ro.recordData = rec process_extraData(config.recordExtensionHash, req, ro, r) recs.append(ro) self.records = recs nrp = end + 1 # Back to SRU 1-based recordPosition if ( nrp < self.numberOfRecords and nrp > 0): self.nextRecordPosition = nrp if (rsn): self.resultSetId = rsn self.resultSetIdleTime = ttl else: self.numberOfRecords = 0 self.extraResponseData = [] # empty to prevent data from previous requests process_extraData(config.searchExtensionHash, req, self, rs) process_extraData(config.responseExtensionHash, req, self)
def process_searchRetrieve(self, opts, result): session = self.session if 'query' in opts: q = cqlParser.parse(opts['query']) q.config = session.config opts['xQuery'] = etree.XML(q.toXCQL()) else: raise self.diagnostic(7, msg="Mandatory parameter not supplied", details='query') db = session.config.parent session.database = db.id rss = db.get_object(session, 'defaultResultSetStore') recordMap.update(session.config.recordNamespaces) schema = opts.get('recordSchema', '') if not schema and hasattr(session.config, 'defaultRetrieveSchema'): schema = session.config.defaultRetrieveSchema if (schema in recordMap): schema = recordMap[schema] if (schema and not (schema in session.config.recordNamespaces.values())): raise self.diagnostic(66, msg="Unknown schema for retrieval", details=schema) txr = session.config.transformerHash.get(schema, None) recordPacking = opts.get('recordPacking', 'xml') if not recordPacking in ["string", "xml"]: raise self.diagnostic(71, msg="Unsupported record packing", details=recordPacking) # Fencepost. SRW starts at 1, C3 starts at 0 startRecord = opts.get('startRecord', 1) - 1 maximumRecords = opts.get('maximumRecords', -1) if maximumRecords < 0: if hasattr(session.config, 'defaultNumberOfRecords'): maximumRecords = session.config.defaultNumberOfRecords else: maximumRecords = 1 ttl = opts.get('resultSetTTL', 0) try: rsn = q.getResultSetId() except c3errors.ConfigFileException as e: d = self.diagnostic(10, msg='Query syntax error.') if e.reason == "Zeerex does not have default context set.": d.message = ('Query syntax error. Database has no default ' 'context set for indexes. You must supply a ' 'context set for each index.') raise d try: rs = db.search(session, q) except c3errors.ObjectDoesNotExistException as e: raise self.diagnostic(16, msg='Unsupported index', details=e.reason) except c3errors.QueryException as e: raise self.diagnostic(24, msg='Unsupported combination of relation ' 'and term', details=e.reason) session.currentResultSet = rs result.append(elemFac.numberOfRecords(str(len(rs)))) if (len(rs)): recs = elemFac.records() if (ttl and not rsn): rs.expires = ttl rsn = rss.create_resultSet(session, rs) end = min(startRecord + maximumRecords, len(rs)) for rIdx in range(startRecord, end): rsi = rs[rIdx] try: r = rsi.fetch_record(session) except c3errors.ObjectDeletedException: diag = self.diagnostic(65, "Record deleted.", rsi.id) rec = self.record( schema='info:srw/schema/1/diagnostics-v1.1', data=self.diagnosticToXml(diag), identifier=str(rsi), position=rIdx + 1) else: if (txr is not None): doc = txr.process_record(session, r) xml = doc.get_raw(session) else: xml = r.get_xml(session) xml = xmlVerRe.sub("", xml) # Fencepost. SRW starts at 1, C3 starts at 0 rec = self.record(schema=schema, packing=recordPacking, data=xml, identifier=str(rsi), position=rIdx + 1) self.extraData('record', opts, rec, rsi, r) recs.append(rec) if rsn: result.append(elemFac.resultSetId(rsn)) result.append(elemFac.resultSetIdleTime(str(ttl))) result.append(recs) nrp = end + 1 if (nrp < len(rs) and nrp > 0): result.append(elemFac.nextRecordPosition(str(nrp))) self.extraData('searchRetrieve', opts, result, rs, db) return result
def handleScan(self, session, data): if (hasattr(data, 'stepSize')): step = data.stepSize else: step = 0 resp = ScanResponse() resp.stepSize = step resp.scanStatus = 1 resp.numberOfEntriesReturned = 0 resp.positionOfTerm = 0 try: dbs = data.databaseNames if len(dbs) != 1: # Can only scan one db at once? (XXX) raise ValueError nt = data.numberOfTermsRequested rp = data.preferredPositionInResponse if (rp < 0 or rp > (nt+1)): # Busted numbers (XXX) raise ValueError dbname = dbs[0] cfg = self.session.configs.get(dbname, None) db = cfg.parent session.database = db.id where = data.termListAndStartPoint # Make it look like part of an RPN query... w = ('op', ('attrTerm', where)) clause = CQLUtils.rpn2cql(w, cfg) if not clause.term.value: clause.term.value = 'a' nstms = nt * (step + 1) terms = [] clause = cqlParser.parse(clause.toCQL()) if (rp == 1): data = db.scan(session, clause, nstms, direction=">=") elif (rp == 0): data = db.scan(session, clause, nstms, direction=">") elif (rp == mt): data = db.scan(session, clause, nstms, direction="<=") data.reverse() elif (rp == mt+1): data = db.scan(session, clause, nstms, direction="<") data.reverse() else: # Need to go up and down data1 = db.scan(session, clause, nt-rp+1, direction=">=") data = db.scan(session, clause, rp, direction="<=") if data1[0][0] == data[0][0]: data = data[1:] data.reverse() data.extend(data1) for d in data[::step+1]: t = TermInfo() t.term = ('general', d[0]) t.globalOccurrences = d[1][1] terms.append(('termInfo', t)) resp.positionOfTerm = rp resp.numberOfEntriesReturned = len(terms) resp.scanStatus = 0 l = ListEntries() l.entries = terms resp.entries = l except Exception, err: l = ListEntries() d = self.generate_diagnostic(err) d.condition = 123 diag = [('defaultFormat', d)] l.nonsurrogateDiagnostics = diag resp.entries = l resp.numberOfEntriesReturned = 0 resp.scanStatus = 6
def handleSearch(self, session, data): # Must return a response no matter what resp = SearchResponse() resp.resultCount = 0 resp.numberOfRecordsReturned = 0 resp.nextResultSetPosition = 1 resp.searchStatus = 1 resp.resultSetStatus = 1 resp.presentStatus = PresentStatus.get_num_from_name('failure') try: queryType = data.query[0] query = ["", ""] if (queryType in ['type_1', 'type_101']): zQuery = data.query[1] attrset = zQuery.attributeSet query = ['rpn', zQuery.rpn] elif (queryType == 'type_0'): # A Priori external. We assume CQL query = ['cql', data.query[1]] elif (queryType == 'type_2'): # ISO8777 (CCL) rpn = ccl.mk_rpn_query(data.query[1]) query = ['rpn', rpn] elif (queryType == 'type_104'): # Look for CQL or SQL type104 = data.query[1].direct_reference if (type104 == Z3950_QUERY_CQL_ov): query = ['cql', data.query[1].encoding[1]] elif (type104 == Z3950_QUERY_SQL_ov): query = ['sql', data.query[1].encoding[1].queryExpression] # XXX Implement direct to postgres raise NotImplementedError else: # Undefined query type raise NotImplementedError elif (queryType in ['type_102', 'type_100']): # 102: Ranked List, not yet /defined/ let alone implemented # 100: Z39.58 query (Standard was withdrawn) raise NotImplementedError rsetname = data.resultSetName dbs = data.databaseNames resultSets = [] if query[0] == 'cql': q = CQLParser.parse(query[1]) for dbname in dbs: cfg = self.session.configs.get(dbname, None) if cfg is not None: db = cfg.parent if query[0] == 'rpn': self.log("Trying to convert: %s" % (repr(query[1]))) q = CQLUtils.rpn2cql(query[1], cfg) self.log("--> " + q.toCQL()) session.database = db.id q = cqlParser.parse(q.toCQL()) resultSets.append(db.search(session, q)) else: raise ValueError("%s not in %r" % (dbname, self.session.configs.keys())) if len(resultSets) > 1: rs = resultSets[0] for r in resultSets[1:]: rs.combine(r) elif len(resultSets) == 1: rs = resultSets[0] else: # No resultset. return self.encode(('searchResponse', resp)) resp.resultCount = len(rs) # Maybe put it into our DB if session.resultSets.has_key(rsetname): rsid = session.resultSets[rsetname] rs.id = rsid session.resultSetStore.store_resultSet(session, rs) else: rsid = session.resultSetStore.create_resultSet(session, rs) session.resultSets[rsetname] = rsid # only keep 4 at once keys = session.resultSetCache.keys() if len(keys) > 3: # delete one at random r = rand.randint(0,3) del session.resultSetCache[keys[r]] session.resultSetCache[rsid] = rs except Exception, err: # XXX add -correct- diagnostic resp.numberOfRecordsReturned = 1 resp.nextResultSetPosition = 0 resp.resultSetStatus = 3 d = self.generate_diagnostic(err) diag = ('nonSurrogateDiagnostic', d) resp.records = diag
def process_searchRetrieve(self, opts, result): if 'query' in opts: q = cqlParser.parse(opts['query']) q.config = session.config opts['xQuery'] = etree.XML(q.toXCQL()) else: raise self.diagnostic(7, msg="Mandatory parameter not supplied", details='query') db = session.config.parent session.database = db.id rss = db.get_object(session, 'defaultResultSetStore') recordMap.update(session.config.recordNamespaces) schema = opts.get('recordSchema', '') if not schema and hasattr(session.config, 'defaultRetrieveSchema'): schema = session.config.defaultRetrieveSchema if (schema in recordMap): schema = recordMap[schema] if (schema and not (schema in session.config.recordNamespaces.values())): raise self.diagnostic(66, msg="Unknown schema for retrieval", details=schema) txr = session.config.transformerHash.get(schema, None) recordPacking = opts.get('recordPacking', 'xml') if not recordPacking in ["string", "xml"]: raise self.diagnostic(71, msg="Unsupported record packing", details=recordPacking) # Fencepost. SRW starts at 1, C3 starts at 0 startRecord = opts.get('startRecord', 1) -1 maximumRecords = opts.get('maximumRecords', -1) if maximumRecords < 0: if hasattr(session.config, 'defaultNumberOfRecords'): maximumRecords = session.config.defaultNumberOfRecords else: maximumRecords = 1 ttl = opts.get('resultSetTTL', 0) try: rsn = q.getResultSetId() except c3errors.ConfigFileException as e: d = self.diagnostic(10, msg='Query syntax error.') if e.reason == "Zeerex does not have default context set.": d.message = 'Query syntax error. Database has no default context set for indexes. You must supply a context set for each index.' raise d try: rs = db.search(session, q) except c3errors.ObjectDoesNotExistException as e: raise self.diagnostic(16, msg='Unsupported index', details=e.reason) except c3errors.QueryException as e: raise self.diagnostic(24, msg='Unsupported combination of relation and term', details=e.reason) session.currentResultSet = rs result.append(elemFac.numberOfRecords(str(len(rs)))) if (len(rs)): recs = elemFac.records() if (ttl and not rsn): rs.expires = ttl rsn = rss.create_resultSet(session, rs) end = min(startRecord+maximumRecords, len(rs)) for rIdx in range(startRecord, end): rsi = rs[rIdx] r = rsi.fetch_record(session) if (txr != None): doc = txr.process_record(session, r) xml = doc.get_raw(session) else: xml = r.get_xml(session) xml = xmlVerRe.sub("", xml) rec = self.record(schema=schema, packing=recordPacking, data=xml, identifier=str(rsi), position=rIdx+1) # Fencepost. SRW starts at 1, C3 starts at 0 self.extraData('record', opts, rec, rsi, r) recs.append(rec) if rsn: result.append(elemFac.resultSetId(rsn)) result.append(elemFac.resultSetIdleTime(str(ttl))) result.append(recs) nrp = end + 1 if ( nrp < len(rs) and nrp > 0): result.append(elemFac.nextRecordPosition(str(nrp))) self.extraData('searchRetrieve', opts, result, rs, db) return result