Ejemplo n.º 1
0
    def _rewriteQuery(self, session, query):
        if not hasattr(query, 'leftOperand'):
            if query.relation.value == "all" :
                # rewrite to AND triples
                nbool = " and "
            elif query.relation.value == "any":
                nbool = " or "
            elif query.relation.value == "=" and not query.term.value.isnumeric() and query.term.value.index(' ') > -1:
                nbool = " prox "
            else:
                # can't rewrite
                return None

            # now split on spaces
            terms = query.term.value.split(' ')
            if len(terms) == 1:
                return None
            nq = []
            for t in terms:
                nq.append(' '.join([query.index.toCQL(), query.relation.toCQL(), '"' + t + '"']))
            newstr = nbool.join(nq)
            newQuery = cql.parse(newstr)
            return newQuery
        else:
            n = self._rewriteQuery(session, query.leftOperand)
            if n:
                query.leftOperand = n
            n = self._rewriteQuery(session, query.rightOperand)
            if n:
                query.rightOperand = n
            return None
Ejemplo n.º 2
0
 def value_of(elem):
     #typehash = {'int' : int, 'long' : long, 'bool' : bool, 'float' : float}
     t = elem.attrib['t']
     txt = unescape(elem.text)
     if t == 'pickle':
         val = pickle.loads(txt.encode('utf-8'))
     elif t == 'None':
         val = None
     elif t == 'object':
         # dereference id
         db = session.server.get_object(session, session.database)
         val = db.get_object(session, txt)
     elif t == 'cql':
         try:
             val = cqlParser.parse(txt)
         except:
             raise
     elif t in dsrlz_typehash:
         if type(txt) == unicode and t != 'unicode':
             val = dsrlz_typehash[t](txt.encode('utf-8'))
         else:
             val = dsrlz_typehash[t](txt)
     else:
         val = txt
     return val
Ejemplo n.º 3
0
 def value_of(elem):
     #typehash = {'int' : int, 'long' : long, 'bool' : bool, 'float' : float}
     t = elem.attrib['t']
     txt = unescape(elem.text)
     if t == 'pickle':
         val = pickle.loads(txt.encode('utf-8'))
     elif t == 'None':
         val = None
     elif t == 'object':
         # dereference id
         db = session.server.get_object(session, session.database)
         val = db.get_object(session, txt)
     elif t == 'cql':
         try:
             val = cqlParser.parse(txt)
         except:
             raise
     elif t in dsrlz_typehash:
         if type(txt) == unicode and t != 'unicode':
             val = dsrlz_typehash[t](txt.encode('utf-8'))
         else:
             val = dsrlz_typehash[t](txt)
     else:
         val = txt
     return val
Ejemplo n.º 4
0
 def _rewriteQuery(self, session, query):
     if not hasattr(query, 'leftOperand'):
         if query.relation.value == "all":
             # Rewrite to AND triples
             nbool = " and "
         elif query.relation.value == "any":
             nbool = " or "
         elif (query.relation.value == "="
               and not query.term.value.isnumeric()
               and query.term.value.index(' ') > -1):
             nbool = " prox "
         else:
             # Can't rewrite
             return None
         # Now split on spaces
         terms = query.term.value.split(' ')
         if len(terms) == 1:
             return None
         nq = []
         for t in terms:
             nq.append(' '.join([
                 query.index.toCQL(),
                 query.relation.toCQL(), '"' + t + '"'
             ]))
         newstr = nbool.join(nq)
         newQuery = cql.parse(newstr)
         return newQuery
     else:
         n = self._rewriteQuery(session, query.leftOperand)
         if n:
             query.leftOperand = n
         n = self._rewriteQuery(session, query.rightOperand)
         if n:
             query.rightOperand = n
         return None
Ejemplo n.º 5
0
    def parse(self, session, data, codec, db):
        form = data
        qClauses = []
        bools = []
        i = 1
        while "fieldcont{0}".format(i) in form:
            boolean = form.getfirst("fieldbool{0}".format(i - 1), "and/relevant/proxinfo")
            bools.append(boolean)
            i += 1

        i = 1
        while "fieldcont{0}".format(i) in form:
            cont = form.getfirst("fieldcont{0}".format(i))
            idxs = unquote(form.getfirst("fieldidx{0}".format(i), "cql.anywhere"))
            rel = unquote(form.getfirst("fieldrel{0}".format(i), "all/relevant/proxinfo"))
            idxClauses = []
            # in case they're trying to do phrase searching
            if rel.startswith("exact") or rel.startswith("=") or rel.find("/string") != -1:
                # don't allow phrase searching for exact or /string searches
                cont = cont.replace('"', '\\"')

            for idx in idxs.split("||"):
                subClauses = []
                if rel.startswith("all"):
                    subBool = " and/relevant/proxinfo "
                else:
                    subBool = " or/relevant/proxinfo "

                # in case they're trying to do phrase searching
                if rel.find("exact") != -1 or rel.find("=") != -1 or rel.find("/string") != -1:
                    # don't allow phrase searching for exact or /string searches
                    # we already did quote escaping
                    pass
                else:
                    phrases = self.phraseRe.findall(cont)
                    for ph in phrases:
                        subClauses.append("({0} =/relevant/proxinfo {1})".format(idx, ph))

                    cont = self.phraseRe.sub("", cont)

                if idx and rel and cont:
                    subClauses.append("{0} {1} {2}".format(idx, rel, cont.strip()))

                if len(subClauses):
                    idxClauses.append("({0})".format(subBool.join(subClauses)))

            qClauses.append("({0})".format(" or/rel.combine=sum/proxinfo ".join(idxClauses)))
            # if there's another clause and a corresponding boolean
            try:
                qClauses.append(bools[i])
            except:
                break

            i += 1

        qString = " ".join(qClauses)
        formcodec = form.getfirst("_charset_", "utf-8")
        return cql.parse(qString.decode(formcodec).encode("utf-8"))
Ejemplo n.º 6
0
 def fetch_query(self, session, id):
     """Fetch query data, parse it into a query object and return."""
     cql = self.fetch_data(session, id)
     q = cqlParser.parse(cql)
     q.id = id
     try:
         rsid = self.fetch_data(session, "__rset_%s" % id)
     except ObjectDoesNotExistException:
         pass
     else:
         self.resultSetId = rsid
     return q
Ejemplo n.º 7
0
 def fetch_query(self, session, id):
     """Fetch query data, parse it into a query object and return."""
     cql = self.fetch_data(session, id)
     q = cqlParser.parse(cql)
     q.id = id
     try:
         rsid = self.fetch_data(session, "__rset_%s" % id)
     except ObjectDoesNotExistException:
         pass
     else:
         self.resultSetId = rsid
     return q
Ejemplo n.º 8
0
    def process_scan(self, opts, result):
        db = session.config.parent
        session.database = db.id

        if 'scanClause' in opts:
            q = cqlParser.parse(opts['scanClause'])
            opts['xQuery'] = etree.XML(q.toXCQL())
        else:
            raise self.diagnostic(7,
                                  msg="Mandatory parameter not supplied",
                                  details='scanClause')

        mt = opts.get('maximumTerms', 20)
        rp = opts.get('responsePosition', 0)
        if (rp < 0 or rp > (mt + 1)):
            raise self.diagnostic(120,
                                  msg="Response position out of range",
                                  details=str(rp))

        if (not q.term.value):
            q.term.value = chr(0)

        q.config = session.config

        if (rp == 1):
            data = db.scan(session, q, mt, direction=">=")
        elif (rp == 0):
            data = db.scan(session, q, mt, direction=">")
        elif (rp == mt):
            data = db.scan(session, q, mt, direction="<=")
            data.reverse()
        elif (rp == mt + 1):
            data = db.scan(session, q, mt, direction="<")
            data.reverse()
        else:
            # Need to go up and down
            data1 = db.scan(session, q, mt - rp + 1, direction=">=")
            data = db.scan(session, q, rp, direction="<=")
            if data1[0][0] == data[0][0]:
                data = data[1:]
            data.reverse()
            data.extend(data1)

        terms = elemFac.terms()
        for d in data:
            t = self.term(value=d[0], num=d[1][1])
            self.extraData('term', opts, t, d)
            terms.append(t)
        result.append(terms)
        return result
Ejemplo n.º 9
0
 def fetch_query(self, session, id):
     """Fetch query data, parse it into a query object and return."""
     cql = self.fetch_data(session, id)
     if cql is not None and cql:
         q = cqlParser.parse(cql)
     elif (isinstance(cql, DeletedObject)):
         raise ObjectDeletedException(cql)
     else:
         raise ObjectDoesNotExistException(id)
     q.id = id
     try:
         rsid = self.fetch_data(session, "__rset_%s" % id)
     except ObjectDoesNotExistException:
         pass
     else:
         self.resultSetId = rsid
     return q
Ejemplo n.º 10
0
    def process_scan(self, opts, result):
        db = session.config.parent
        session.database = db.id

        if 'scanClause' in opts:
            q = cqlParser.parse(opts['scanClause'])
            opts['xQuery'] = etree.XML(q.toXCQL())
        else:
            raise self.diagnostic(7, msg="Mandatory parameter not supplied", details='scanClause')

        mt = opts.get('maximumTerms', 20)
        rp = opts.get('responsePosition', 0)
        if (rp < 0 or rp > (mt+1)):
            raise self.diagnostic(120, msg="Response position out of range", details=str(rp))

        if (not q.term.value):
            q.term.value = chr(0)

        q.config = session.config

        if (rp == 1):
            data = db.scan(session, q, mt, direction=">=")
        elif (rp == 0):
            data = db.scan(session, q, mt, direction=">")
        elif (rp == mt):
            data = db.scan(session, q, mt, direction="<=")
            data.reverse()
        elif (rp == mt+1):
            data = db.scan(session, q, mt, direction="<")
            data.reverse()
        else:
            # Need to go up and down
            data1 = db.scan(session, q, mt-rp+1, direction=">=")
            data = db.scan(session, q, rp, direction="<=")
            if data1[0][0] == data[0][0]:
                data = data[1:]
            data.reverse()
            data.extend(data1)

        terms = elemFac.terms()
        for d in data:
            t = self.term(value=d[0], num=d[1][1])
            self.extraData('term', opts, t, d)
            terms.append(t)
        result.append(terms)
        return result
Ejemplo n.º 11
0
 def fetch_query(self, session, id):
     """Fetch query data, parse it into a query object and return."""
     cql = self.fetch_data(session, id)
     if cql is not None and cql:
         q = cqlParser.parse(cql)
     elif (isinstance(cql, DeletedObject)):
         raise ObjectDeletedException(cql)
     else:
         raise ObjectDoesNotExistException(id)
     q.id = id
     try:
         rsid = self.fetch_data(session, "__rset_%s" % id)
     except ObjectDoesNotExistException:
         pass
     else:
         self.resultSetId = rsid
     return q
Ejemplo n.º 12
0
    def parse(self, session, data, codec, db):
        form = data
        qClauses = []
        bools = []
        i = 1
        while 'fieldcont{0}'.format(i) in form:
            boolean = form.getfirst('fieldbool{0}'.format(i - 1),
                                    'and/relevant/proxinfo')
            bools.append(boolean)
            i += 1

        i = 1
        while 'fieldcont{0}'.format(i) in form:
            cont = form.getfirst('fieldcont{0}'.format(i))
            idxs = unquote(
                form.getfirst('fieldidx{0}'.format(i), 'cql.anywhere'))
            rel = unquote(
                form.getfirst('fieldrel{0}'.format(i),
                              'all/relevant/proxinfo'))
            idxClauses = []
            # in case they're trying to do phrase searching
            if (rel.startswith('exact') or rel.startswith('=')
                    or rel.find('/string') != -1):
                # don't allow phrase searching for exact or /string searches
                cont = cont.replace('"', '\\"')

            for idx in idxs.split('||'):
                subClauses = []
                if (rel.startswith('all')):
                    subBool = ' and/relevant/proxinfo '
                else:
                    subBool = ' or/relevant/proxinfo '

                # in case they're trying to do phrase searching
                if (rel.find('exact') != -1 or rel.find('=') != -1
                        or rel.find('/string') != -1):
                    # don't allow phrase searching for exact or /string searches
                    # we already did quote escaping
                    pass
                else:
                    phrases = self.phraseRe.findall(cont)
                    for ph in phrases:
                        subClauses.append(
                            '({0} =/relevant/proxinfo {1})'.format(idx, ph))

                    cont = self.phraseRe.sub('', cont)

                if (idx and rel and cont):
                    subClauses.append('{0} {1} {2}'.format(
                        idx, rel, cont.strip()))

                if (len(subClauses)):
                    idxClauses.append('({0})'.format(subBool.join(subClauses)))

            qClauses.append('({0})'.format(
                ' or/rel.combine=sum/proxinfo '.join(idxClauses)))
            # if there's another clause and a corresponding boolean
            try:
                qClauses.append(bools[i])
            except:
                break

            i += 1

        qString = ' '.join(qClauses)
        formcodec = form.getfirst('_charset_', 'utf-8')
        return cql.parse(qString.decode(formcodec).encode('utf-8'))
Ejemplo n.º 13
0
 def parse(self, session, data, codec, db):
     # XXX check codec, turn into unicode first
     return cql.parse(data)
Ejemplo n.º 14
0
def resultSetFacetsHandler(session, val, resp, resultSet=[], db=None):
    """Put facet for requested index into extraSearchRetrieveData
       val is a CQL query. 
           Boolean used is meaningless, facets are returned for each clause.
           Term in each clause is also meaningless and need be nothing more than *
       Result looks something like browse response e.g.
    <facets>
        <facetByIndex index="dc.subject" relation"exact">
            <term>
                <value>Genetics</value>
                <numberOfRecords>2</numberOfRecords>
            </term>
            ...
        </facet>
        ...
    </facets>
    """
    # quick escapes
    if not len(resultSet) or db is None:
        return

    global namespaces, sruElemFac
    myNamespaces = namespaces.copy()
    myNamespaces['fct'] = "info:srw/extension/2/facets-1.0"

    pm = db.get_path(session, 'protocolMap')
    if not pm:
        db._cacheProtocolMaps(session)
        pm = db.protocolMaps.get('http://www.loc.gov/zing/srw/')
        self.paths['protocolMap'] = pm

    fctElemFac = ElementMaker(namespace=myNamespaces['fct'],
                              nsmap=myNamespaces)

    def getFacets(query):
        if (isinstance(query, cql.SearchClause)):
            fctEl = fctElemFac.facetsByIndex({
                'index': query.index.toCQL(),
                'relation': query.relation.toCQL()
            })
            #            fctEl.append(sruElemFac.index(query.index.toCQL()))
            #            fctEl.append(sruElemFac.relation(query.relation.toCQL()))
            idx = pm.resolveIndex(session, query)
            if idx is None:
                fctEl.append(
                    diagnosticToXML(
                        cql.Diagnostic(code=16,
                                       message="Unsupported Index",
                                       details=query.index.toCQL())))
                return fctEl

            try:
                facets = idx.facets(session, resultSet)
            except:
                # index doesn't support facets
                # TODO: diagnostic?
                facets = []

            termsEl = sruElemFac.terms()
            for f in facets:
                termsEl.append(
                    sruElemFac.term(sruElemFac.value(f[0]),
                                    sruElemFac.numberOfRecords(str(f[1][1]))))

            fctEl.append(termsEl)
            return [fctEl]
        else:
            fctEls = getFacets(query.leftOperand)
            fctEls.extend(getFacets(query.rightOperand))
            return fctEls

    fctsEl = fctElemFac.facets()
    try:
        query = cql.parse(val)
    except cql.Diagnostic as d:
        fctsEl.append(diagnosticToXML(d))
        return fctsEl

    for el in getFacets(query):
        fctsEl.append(el)

    return fctsEl
Ejemplo n.º 15
0
    def handleScan(self, session, data):
        if (hasattr(data, 'stepSize')):
            step = data.stepSize
        else:
            step = 0
        resp = ScanResponse()
        resp.stepSize = step
        resp.scanStatus = 1
        resp.numberOfEntriesReturned = 0
        resp.positionOfTerm = 0

        try:
            dbs = data.databaseNames
            if len(dbs) != 1:
                # Can only scan one db at once? (XXX)
                raise ValueError
            nt = data.numberOfTermsRequested
            rp = data.preferredPositionInResponse
            if (rp < 0 or rp > (nt + 1)):
                # Busted numbers (XXX)
                raise ValueError
            dbname = dbs[0]
            cfg = self.session.configs.get(dbname, None)
            db = cfg.parent
            session.database = db.id
            where = data.termListAndStartPoint
            # Make it look like part of an RPN query...
            w = ('op', ('attrTerm', where))
            clause = CQLUtils.rpn2cql(w, cfg)
            if not clause.term.value:
                clause.term.value = 'a'
            nstms = nt * (step + 1)
            terms = []
            clause = cqlParser.parse(clause.toCQL())
            if (rp == 1):
                data = db.scan(session, clause, nstms, direction=">=")
            elif (rp == 0):
                data = db.scan(session, clause, nstms, direction=">")
            elif (rp == mt):
                data = db.scan(session, clause, nstms, direction="<=")
                data.reverse()
            elif (rp == mt + 1):
                data = db.scan(session, clause, nstms, direction="<")
                data.reverse()
            else:
                # Need to go up and down
                data1 = db.scan(session, clause, nt - rp + 1, direction=">=")
                data = db.scan(session, clause, rp, direction="<=")
                if data1[0][0] == data[0][0]:
                    data = data[1:]
                data.reverse()
                data.extend(data1)

            for d in data[::step + 1]:
                t = TermInfo()
                t.term = ('general', d[0])
                t.globalOccurrences = d[1][1]
                terms.append(('termInfo', t))
            resp.positionOfTerm = rp
            resp.numberOfEntriesReturned = len(terms)
            resp.scanStatus = 0
            l = ListEntries()
            l.entries = terms
            resp.entries = l
        except Exception, err:
            l = ListEntries()
            d = self.generate_diagnostic(err)
            d.condition = 123
            diag = [('defaultFormat', d)]
            l.nonsurrogateDiagnostics = diag
            resp.entries = l
            resp.numberOfEntriesReturned = 0
            resp.scanStatus = 6
Ejemplo n.º 16
0
    def handleSearch(self, session, data):
        # Must return a response no matter what
        resp = SearchResponse()
        resp.resultCount = 0
        resp.numberOfRecordsReturned = 0
        resp.nextResultSetPosition = 1
        resp.searchStatus = 1
        resp.resultSetStatus = 1
        resp.presentStatus = PresentStatus.get_num_from_name('failure')

        try:
            queryType = data.query[0]
            query = ["", ""]
            if (queryType in ['type_1', 'type_101']):
                zQuery = data.query[1]
                attrset = zQuery.attributeSet
                query = ['rpn', zQuery.rpn]
            elif (queryType == 'type_0'):
                # A Priori external. We assume CQL
                query = ['cql', data.query[1]]
            elif (queryType == 'type_2'):
                # ISO8777  (CCL)
                rpn = ccl.mk_rpn_query(data.query[1])
                query = ['rpn', rpn]
            elif (queryType == 'type_104'):
                # Look for CQL or SQL
                type104 = data.query[1].direct_reference
                if (type104 == Z3950_QUERY_CQL_ov):
                    query = ['cql', data.query[1].encoding[1]]
                elif (type104 == Z3950_QUERY_SQL_ov):
                    query = ['sql', data.query[1].encoding[1].queryExpression]
                    # XXX Implement direct to postgres
                    raise NotImplementedError
                else:
                    # Undefined query type
                    raise NotImplementedError
            elif (queryType in ['type_102', 'type_100']):
                # 102: Ranked List, not yet /defined/ let alone implemented
                # 100: Z39.58 query (Standard was withdrawn)
                raise NotImplementedError

            rsetname = data.resultSetName
            dbs = data.databaseNames
            resultSets = []
            if query[0] == 'cql':
                q = CQLParser.parse(query[1])
            for dbname in dbs:
                cfg = self.session.configs.get(dbname, None)
                if cfg is not None:
                    db = cfg.parent
                    if query[0] == 'rpn':
                        self.log("Trying to convert: %s" % (repr(query[1])))
                        q = CQLUtils.rpn2cql(query[1], cfg)
                        self.log("--> " + q.toCQL())
                    session.database = db.id
                    q = cqlParser.parse(q.toCQL())
                    resultSets.append(db.search(session, q))
                else:
                    raise ValueError("%s not in %r" %
                                     (dbname, self.session.configs.keys()))
            if len(resultSets) > 1:
                rs = resultSets[0]
                for r in resultSets[1:]:
                    rs.combine(r)
            elif len(resultSets) == 1:
                rs = resultSets[0]
            else:
                # No resultset.
                return self.encode(('searchResponse', resp))

            resp.resultCount = len(rs)
            # Maybe put it into our DB
            if session.resultSets.has_key(rsetname):
                rsid = session.resultSets[rsetname]
                rs.id = rsid
                session.resultSetStore.store_resultSet(session, rs)
            else:
                rsid = session.resultSetStore.create_resultSet(session, rs)
                session.resultSets[rsetname] = rsid
            # only keep 4 at once
            keys = session.resultSetCache.keys()
            if len(keys) > 3:
                # delete one at random
                r = rand.randint(0, 3)
                del session.resultSetCache[keys[r]]
            session.resultSetCache[rsid] = rs

        except Exception, err:
            # XXX add -correct- diagnostic
            resp.numberOfRecordsReturned = 1
            resp.nextResultSetPosition = 0
            resp.resultSetStatus = 3
            d = self.generate_diagnostic(err)
            diag = ('nonSurrogateDiagnostic', d)
            resp.records = diag
Ejemplo n.º 17
0
def process_scan(self, session, req):
    # Process a scan query

    config = req.config
    db = config.parent
    session.database = db.id

    self.terms = []
    if (not req.version):
        diag = Diagnostic7()
        diag.message = "Mandatory 'version' parameter not supplied"
        diag.details = 'version'
        raise diag

    if req.scanClause:
        #convert clause into SearchClause object
        clause = CQLParser.parse(req.scanClause)
        # Stupid schema.
        xsc = []
        xsc.append(clause.index.toXCQL())
        xsc.append(clause.relation.toXCQL())
        xsc.append(clause.term.toXCQL())
        req.xScanClause = "".join(xsc)
    else:
        # Seriously broken request.
        f = Diagnostic7()
        f.message = 'Request must include a query'
        f.details = 'scanClause'
        raise f

    self.echoedScanRequest = req
    if (req.diagnostics):
        self.diagnostics = req.diagnostics
        return

    mt = req.get('maximumTerms')
    rp = req.get('responsePosition')
    if (rp < 0 or rp > (mt+1)):
        f = Diagnostic120()
        f.message = "Response position out of range"
        f.details = str(rp)
        raise f

    if (not clause.term.value):
        clause.term.value = chr(0)
    
    clause.config = config

    if (rp == 1):
        data = db.scan(session, clause, mt, direction=">=")
    elif (rp == 0):
        data = db.scan(session, clause, mt, direction=">")
    elif (rp == mt):
        data = db.scan(session, clause, mt, direction="<=")
        data.reverse()
    elif (rp == mt+1):
        data = db.scan(session, clause, mt, direction="<")
        data.reverse()
    else:
        # Need to go up and down
        data1 = db.scan(session, clause, mt-rp+1, direction=">=")
        data = db.scan(session, clause, rp, direction="<=")
        if data1[0][0] == data[0][0]:
            data = data[1:]
        data.reverse()
        data.extend(data1)

    for d in data:
        t = SRW.types.ScanTerm('ScanTerm')
        t.value = d[0]
        t.numberOfRecords = d[1][1]
        process_extraData(config.termExtensionHash, req, t, d)
        self.terms.append(t)
    process_extraData(config.scanExtensionHash, req, self)
    process_extraData(config.responseExtensionHash, req, self)
Ejemplo n.º 18
0
 def parse(self, session, data, codec, db):
     form = data
     qString = generate_cqlQuery(form)
     return cql.parse(qString)
Ejemplo n.º 19
0
def resultSetFacetsHandler(session, val, resp, resultSet=[], db=None):
    """Put facet for requested index into extraSearchRetrieveData
       val is a CQL query. 
           Boolean used is meaningless, facets are returned for each clause.
           Term in each clause is also meaningless and need be nothing more than *
       Result looks something like browse response e.g.
    <facets>
        <facetByIndex index="dc.subject" relation"exact">
            <term>
                <value>Genetics</value>
                <numberOfRecords>2</numberOfRecords>
            </term>
            ...
        </facet>
        ...
    </facets>
    """
    # quick escapes
    if not len(resultSet) or db is None:
        return
    
    global namespaces, sruElemFac
    myNamespaces = namespaces.copy()
    myNamespaces['fct'] = "info:srw/extension/2/facets-1.0"
    
    pm = db.get_path(session, 'protocolMap')
    if not pm:
        db._cacheProtocolMaps(session)
        pm = db.protocolMaps.get('http://www.loc.gov/zing/srw/')
        self.paths['protocolMap'] = pm
        
    fctElemFac = ElementMaker(namespace=myNamespaces['fct'], nsmap=myNamespaces)

    def getFacets(query):
        if (isinstance(query, cql.SearchClause)):
            fctEl = fctElemFac.facetsByIndex({'index': query.index.toCQL(), 'relation': query.relation.toCQL()})
#            fctEl.append(sruElemFac.index(query.index.toCQL()))
#            fctEl.append(sruElemFac.relation(query.relation.toCQL()))
            idx = pm.resolveIndex(session, query)
            if idx is None:
                fctEl.append(diagnosticToXML(cql.Diagnostic(code=16, message="Unsupported Index", details=query.index.toCQL())))
                return fctEl
            
            try:
                facets = idx.facets(session, resultSet)
            except:
                # index doesn't support facets
                # TODO: diagnostic?
                facets = []
                
            termsEl = sruElemFac.terms()
            for f in facets:
                termsEl.append(sruElemFac.term(
                                               sruElemFac.value(f[0]),
                                               sruElemFac.numberOfRecords(str(f[1][1]))
                                               )
                               )
                
            fctEl.append(termsEl)
            return [fctEl]
        else:
            fctEls = getFacets(query.leftOperand)
            fctEls.extend(getFacets(query.rightOperand))
            return fctEls
        
    fctsEl = fctElemFac.facets()
    try:
        query = cql.parse(val)
    except cql.Diagnostic as d:
        fctsEl.append(diagnosticToXML(d))
        return fctsEl
    
    for el in getFacets(query):
        fctsEl.append(el)
        
    return fctsEl
Ejemplo n.º 20
0
 def parse(self, session, data, codec, db):
     form = data
     qString = generate_cqlQuery(form)
     return cql.parse(qString)
Ejemplo n.º 21
0
def process_searchRetrieve(self, session, req):

    if (not req.version):
        diag = Diagnostic7()
        diag.message = "Mandatory 'version' parameter not supplied"
        diag.details = 'version'
        raise diag

    # Get our config based on URL
    config = req.config
    db = config.parent
    session.database = db.id

    rss = db.get_object(session, 'defaultResultSetStore')

    # Setup for processing
    if (req.query != ""):
        req.queryStructure = CQLParser.parse(req.query)
    else:
        # No Query, Request is seriously Broken
        f = Diagnostic7()
        f.message = 'Request must include a query'
        f.details = 'query'
        raise f
    req.queryStructure.config = config

    req.xQuery = req.queryStructure.toXCQL()
    self.echoedSearchRetrieveRequest = req
    req.parseSortKeys()

    if (req.diagnostics):
        self.diagnostics = req.diagnostics
        return

    # Check if we recognise the record Schema
    schema = req.get('recordSchema')
    # Redirect to full value
    if (config.recordNamespaces.has_key(schema)):
        schema = config.recordNamespaces[schema]
    if (not schema in config.recordNamespaces.values()):
        diag = Diagnostic66()
        diag.details = schema
        raise diag

    txr = config.transformerHash.get(schema, None)

    recordPacking = req.get('recordPacking')
    if not recordPacking  in ["string", "xml"]:
        diag = Diagnostic71()
        diag.details = req.recordPacking;
        raise diag

    # Fencepost.  SRW starts at 1, C3 starts at 0
    startRecord = req.get('startRecord') -1
    maximumRecords = req.get('maximumRecords')
    ttl = req.get('resultSetTTL')
    nsk = len(req.sortStructure)
    rsn =  req.queryStructure.getResultSetId()
    rs = db.search(session, req.queryStructure)

    recs = []
    if (rs is not None):
        self.numberOfRecords = len(rs)
        if (ttl and not rsn):
            rs.expires = ttl
            rsn = rss.create_resultSet(session, rs)

        self.records = []
        end = min(startRecord+maximumRecords, len(rs))

        for rIdx in range(startRecord, end):
            rsi = rs[rIdx]
            r = rsi.fetch_record(session)
            ro = SRW.types.Record('record')
            ro.recordPacking = recordPacking
            ro.recordSchema = schema

            if (txr is not None):
                doc = txr.process_record(session, r)
                rec = doc.get_raw(session)
                rec = xmlver.sub("", rec)
            else:
                rec = r.get_xml(session)

            if recordPacking == "string":
                ro.recordData = escape(rec)
            else:
                ro.recordData = rec
            
            process_extraData(config.recordExtensionHash, req, ro, r)
            recs.append(ro)

        self.records = recs
        nrp = end + 1                                    # Back to SRU 1-based recordPosition
        if ( nrp < self.numberOfRecords and nrp > 0):
            self.nextRecordPosition = nrp
        if (rsn):
            self.resultSetId = rsn
            self.resultSetIdleTime = ttl
    else:
        self.numberOfRecords = 0
    
    self.extraResponseData = []    # empty to prevent data from previous requests
    process_extraData(config.searchExtensionHash, req, self, rs)
    process_extraData(config.responseExtensionHash, req, self)
Ejemplo n.º 22
0
    def process_searchRetrieve(self, opts, result):
        session = self.session
        if 'query' in opts:
            q = cqlParser.parse(opts['query'])
            q.config = session.config
            opts['xQuery'] = etree.XML(q.toXCQL())
        else:
            raise self.diagnostic(7,
                                  msg="Mandatory parameter not supplied",
                                  details='query')

        db = session.config.parent
        session.database = db.id
        rss = db.get_object(session, 'defaultResultSetStore')

        recordMap.update(session.config.recordNamespaces)
        schema = opts.get('recordSchema', '')
        if not schema and hasattr(session.config, 'defaultRetrieveSchema'):
            schema = session.config.defaultRetrieveSchema
        if (schema in recordMap):
            schema = recordMap[schema]
        if (schema
                and not (schema in session.config.recordNamespaces.values())):
            raise self.diagnostic(66,
                                  msg="Unknown schema for retrieval",
                                  details=schema)
        txr = session.config.transformerHash.get(schema, None)

        recordPacking = opts.get('recordPacking', 'xml')
        if not recordPacking in ["string", "xml"]:
            raise self.diagnostic(71,
                                  msg="Unsupported record packing",
                                  details=recordPacking)

        # Fencepost.  SRW starts at 1, C3 starts at 0
        startRecord = opts.get('startRecord', 1) - 1

        maximumRecords = opts.get('maximumRecords', -1)
        if maximumRecords < 0:
            if hasattr(session.config, 'defaultNumberOfRecords'):
                maximumRecords = session.config.defaultNumberOfRecords
            else:
                maximumRecords = 1
        ttl = opts.get('resultSetTTL', 0)

        try:
            rsn = q.getResultSetId()
        except c3errors.ConfigFileException as e:
            d = self.diagnostic(10, msg='Query syntax error.')
            if e.reason == "Zeerex does not have default context set.":
                d.message = ('Query syntax error. Database has no default '
                             'context set for indexes. You must supply a '
                             'context set for each index.')
            raise d

        try:
            rs = db.search(session, q)
        except c3errors.ObjectDoesNotExistException as e:
            raise self.diagnostic(16,
                                  msg='Unsupported index',
                                  details=e.reason)
        except c3errors.QueryException as e:
            raise self.diagnostic(24,
                                  msg='Unsupported combination of relation '
                                  'and term',
                                  details=e.reason)
        session.currentResultSet = rs
        result.append(elemFac.numberOfRecords(str(len(rs))))
        if (len(rs)):
            recs = elemFac.records()
            if (ttl and not rsn):
                rs.expires = ttl
                rsn = rss.create_resultSet(session, rs)
            end = min(startRecord + maximumRecords, len(rs))

            for rIdx in range(startRecord, end):
                rsi = rs[rIdx]
                try:
                    r = rsi.fetch_record(session)
                except c3errors.ObjectDeletedException:
                    diag = self.diagnostic(65, "Record deleted.", rsi.id)
                    rec = self.record(
                        schema='info:srw/schema/1/diagnostics-v1.1',
                        data=self.diagnosticToXml(diag),
                        identifier=str(rsi),
                        position=rIdx + 1)
                else:
                    if (txr is not None):
                        doc = txr.process_record(session, r)
                        xml = doc.get_raw(session)
                    else:
                        xml = r.get_xml(session)
                    xml = xmlVerRe.sub("", xml)
                    # Fencepost. SRW starts at 1, C3 starts at 0
                    rec = self.record(schema=schema,
                                      packing=recordPacking,
                                      data=xml,
                                      identifier=str(rsi),
                                      position=rIdx + 1)
                    self.extraData('record', opts, rec, rsi, r)
                recs.append(rec)

            if rsn:
                result.append(elemFac.resultSetId(rsn))
                result.append(elemFac.resultSetIdleTime(str(ttl)))
            result.append(recs)
            nrp = end + 1
            if (nrp < len(rs) and nrp > 0):
                result.append(elemFac.nextRecordPosition(str(nrp)))
        self.extraData('searchRetrieve', opts, result, rs, db)
        return result
Ejemplo n.º 23
0
    def handleScan(self, session, data):
        if (hasattr(data, 'stepSize')):
            step = data.stepSize
        else:
            step = 0
        resp = ScanResponse()
        resp.stepSize = step
        resp.scanStatus = 1
        resp.numberOfEntriesReturned = 0
        resp.positionOfTerm = 0

        try:
            dbs = data.databaseNames
            if len(dbs) != 1:
                # Can only scan one db at once? (XXX)
                raise ValueError
            nt = data.numberOfTermsRequested
            rp = data.preferredPositionInResponse
            if (rp < 0 or rp > (nt+1)):
                # Busted numbers (XXX)
                raise ValueError
            dbname = dbs[0]
            cfg = self.session.configs.get(dbname, None)
            db = cfg.parent
            session.database = db.id
            where = data.termListAndStartPoint
            # Make it look like part of an RPN query...
            w = ('op', ('attrTerm', where))
            clause = CQLUtils.rpn2cql(w, cfg)                     
            if not clause.term.value:
                clause.term.value = 'a'
            nstms = nt * (step + 1)
            terms = []
            clause = cqlParser.parse(clause.toCQL())
            if (rp == 1):
                data = db.scan(session, clause, nstms, direction=">=")
            elif (rp == 0):
                data = db.scan(session, clause, nstms, direction=">")
            elif (rp == mt):
                data = db.scan(session, clause, nstms, direction="<=")
                data.reverse()
            elif (rp == mt+1):
                data = db.scan(session, clause, nstms, direction="<")
                data.reverse()
            else:
                # Need to go up and down
                data1 = db.scan(session, clause, nt-rp+1, direction=">=")
                data = db.scan(session, clause, rp, direction="<=")
                if data1[0][0] == data[0][0]:
                    data = data[1:]
                data.reverse()
                data.extend(data1)
            
            for d in data[::step+1]:
                t = TermInfo()
                t.term = ('general', d[0])
                t.globalOccurrences = d[1][1]
                terms.append(('termInfo', t))
            resp.positionOfTerm = rp
            resp.numberOfEntriesReturned = len(terms)
            resp.scanStatus = 0
            l = ListEntries()
            l.entries = terms
            resp.entries = l
        except Exception, err:
            l = ListEntries()
            d = self.generate_diagnostic(err)
            d.condition = 123
            diag = [('defaultFormat', d)]
            l.nonsurrogateDiagnostics = diag
            resp.entries = l
            resp.numberOfEntriesReturned = 0
            resp.scanStatus = 6
Ejemplo n.º 24
0
    def handleSearch(self, session, data):
        # Must return a response no matter what
        resp = SearchResponse()
        resp.resultCount = 0
        resp.numberOfRecordsReturned = 0
        resp.nextResultSetPosition = 1
        resp.searchStatus = 1
        resp.resultSetStatus = 1
        resp.presentStatus = PresentStatus.get_num_from_name('failure')

        try:
            queryType = data.query[0]
            query = ["", ""]
            if (queryType in ['type_1', 'type_101']):
                zQuery = data.query[1]
                attrset = zQuery.attributeSet
                query = ['rpn', zQuery.rpn]
            elif (queryType == 'type_0'):
                # A Priori external. We assume CQL
                query = ['cql', data.query[1]]
            elif (queryType == 'type_2'):
                # ISO8777  (CCL)
                rpn = ccl.mk_rpn_query(data.query[1])
                query = ['rpn', rpn]           
            elif (queryType == 'type_104'):
                # Look for CQL or SQL
                type104 = data.query[1].direct_reference
                if (type104 == Z3950_QUERY_CQL_ov):
                    query = ['cql', data.query[1].encoding[1]]
                elif (type104 == Z3950_QUERY_SQL_ov):
                    query = ['sql', data.query[1].encoding[1].queryExpression]
                    # XXX Implement direct to postgres
                    raise NotImplementedError
                else:
                    # Undefined query type
                    raise NotImplementedError
            elif (queryType in ['type_102', 'type_100']):
                # 102: Ranked List, not yet /defined/ let alone implemented
                # 100: Z39.58 query (Standard was withdrawn)
                raise NotImplementedError

            rsetname = data.resultSetName
            dbs = data.databaseNames
            resultSets = []
            if query[0] == 'cql':
                q = CQLParser.parse(query[1])
            for dbname in dbs:
                cfg = self.session.configs.get(dbname, None)
                if cfg is not None:
                    db = cfg.parent
                    if query[0] == 'rpn':
                        self.log("Trying to convert: %s" % (repr(query[1])))
                        q = CQLUtils.rpn2cql(query[1], cfg)               
                        self.log("--> " + q.toCQL())
                    session.database = db.id
                    q = cqlParser.parse(q.toCQL())
                    resultSets.append(db.search(session, q))
                else:
                    raise ValueError("%s not in %r" % (dbname, self.session.configs.keys()))
            if len(resultSets) > 1:
                rs = resultSets[0]
                for r in resultSets[1:]:
                    rs.combine(r)
            elif len(resultSets) == 1:
                rs = resultSets[0]
            else:
                # No resultset.
                return self.encode(('searchResponse', resp))

            resp.resultCount = len(rs)
            # Maybe put it into our DB
            if session.resultSets.has_key(rsetname):
                rsid = session.resultSets[rsetname]
                rs.id = rsid
                session.resultSetStore.store_resultSet(session, rs)
            else:
                rsid = session.resultSetStore.create_resultSet(session, rs)
                session.resultSets[rsetname] = rsid
            # only keep 4 at once
            keys = session.resultSetCache.keys()
            if len(keys) > 3:
                # delete one at random
                r = rand.randint(0,3)
                del session.resultSetCache[keys[r]]
            session.resultSetCache[rsid] = rs

        except Exception, err:
            # XXX add -correct- diagnostic
            resp.numberOfRecordsReturned = 1
            resp.nextResultSetPosition = 0
            resp.resultSetStatus = 3           
            d = self.generate_diagnostic(err)
            diag = ('nonSurrogateDiagnostic', d)
            resp.records = diag
Ejemplo n.º 25
0
 def parse(self, session, data, codec, db):
     # XXX check codec, turn into unicode first
     return cql.parse(data)
Ejemplo n.º 26
0
    def process_searchRetrieve(self, opts, result):

        if 'query' in opts:
            q = cqlParser.parse(opts['query'])
            q.config = session.config
            opts['xQuery'] = etree.XML(q.toXCQL())
        else:
            raise self.diagnostic(7, msg="Mandatory parameter not supplied", details='query')

        db = session.config.parent
        session.database = db.id
        rss = db.get_object(session, 'defaultResultSetStore')

        recordMap.update(session.config.recordNamespaces)
        schema = opts.get('recordSchema', '')
        if not schema and hasattr(session.config, 'defaultRetrieveSchema'):
            schema = session.config.defaultRetrieveSchema
        if (schema in recordMap):
            schema = recordMap[schema]
        if (schema and not (schema in session.config.recordNamespaces.values())):
            raise self.diagnostic(66, msg="Unknown schema for retrieval", details=schema)
        txr = session.config.transformerHash.get(schema, None)

        recordPacking = opts.get('recordPacking', 'xml')
        if not recordPacking  in ["string", "xml"]:
            raise self.diagnostic(71, msg="Unsupported record packing", details=recordPacking)

        # Fencepost.  SRW starts at 1, C3 starts at 0
        startRecord = opts.get('startRecord', 1) -1

        maximumRecords = opts.get('maximumRecords', -1)
        if maximumRecords < 0:
            if hasattr(session.config, 'defaultNumberOfRecords'):
                maximumRecords = session.config.defaultNumberOfRecords
            else:
                maximumRecords = 1
        ttl = opts.get('resultSetTTL', 0)

        try:
            rsn = q.getResultSetId()
        except c3errors.ConfigFileException as e:
            d = self.diagnostic(10, msg='Query syntax error.')
            if e.reason == "Zeerex does not have default context set.":
                d.message = 'Query syntax error. Database has no default context set for indexes. You must supply a context set for each index.'
            raise d    

        try:
            rs = db.search(session, q)
        except c3errors.ObjectDoesNotExistException as e:
            raise self.diagnostic(16, msg='Unsupported index', details=e.reason)
        except c3errors.QueryException as e:
            raise self.diagnostic(24, msg='Unsupported combination of relation and term', details=e.reason)
                    
        session.currentResultSet = rs
        result.append(elemFac.numberOfRecords(str(len(rs))))

        if (len(rs)):
            recs = elemFac.records()
            if (ttl and not rsn):
                rs.expires = ttl
                rsn = rss.create_resultSet(session, rs)
            end = min(startRecord+maximumRecords, len(rs))

            for rIdx in range(startRecord, end):
                rsi = rs[rIdx]
                r = rsi.fetch_record(session)

                if (txr != None):
                    doc = txr.process_record(session, r)
                    xml = doc.get_raw(session)
                else:
                    xml = r.get_xml(session)
                xml = xmlVerRe.sub("", xml)
                rec = self.record(schema=schema, packing=recordPacking,
                                  data=xml, identifier=str(rsi), position=rIdx+1) # Fencepost.  SRW starts at 1, C3 starts at 0
                self.extraData('record', opts, rec, rsi, r)
                recs.append(rec)

            if rsn:
                result.append(elemFac.resultSetId(rsn))
                result.append(elemFac.resultSetIdleTime(str(ttl)))
            result.append(recs)
            
            nrp = end + 1
            if ( nrp < len(rs) and nrp > 0):
                result.append(elemFac.nextRecordPosition(str(nrp)))
        
        self.extraData('searchRetrieve', opts, result, rs, db)
        return result