def unpackAnswerLinkQuery(self, answer, answerofs, answersize):
     result = []
     serverno = 0
     row_docno = 0
     row_weight = 0.0
     row_links = []
     row_titles = []
     row_features = []
     while (answerofs < answersize):
         if answer[answerofs] == ord('_'):
             if row_docno != 0:
                 result.append(
                     NblnkRow(serverno, row_docno, row_weight, row_links,
                              row_features, row_titles))
             row_docno = 0
             row_weight = 0.0
             row_links = []
             row_titles = []
             row_features = []
             answerofs += 1
         elif answer[answerofs] == ord('D'):
             (row_docno, ) = struct.unpack_from(">I", answer, answerofs + 1)
             answerofs += struct.calcsize(">I") + 1
         elif answer[answerofs] == ord('W'):
             (row_weight, ) = struct.unpack_from(">d", answer,
                                                 answerofs + 1)
             answerofs += struct.calcsize(">d") + 1
         elif answer[answerofs] == ord('L'):
             (idstr,
              answerofs) = strusMessage.unpackString(answer, answerofs + 1)
             (weight, ) = struct.unpack_from(">d", answer, answerofs)
             answerofs += struct.calcsize(">d")
             row_links.append([idstr, weight])
         elif answer[answerofs] == ord('F'):
             (idstr,
              answerofs) = strusMessage.unpackString(answer, answerofs + 1)
             (weight, ) = struct.unpack_from(">d", answer, answerofs)
             answerofs += struct.calcsize(">d")
             row_features.append([idstr, weight])
         elif answer[answerofs] == ord('T'):
             (idstr,
              answerofs) = strusMessage.unpackString(answer, answerofs + 1)
             (weight, ) = struct.unpack_from("d", answer, answerofs)
             answerofs += struct.calcsize(">d")
             row_titles.append([idstr, weight])
         elif answer[answerofs] == ord('Z'):
             (serverno, ) = struct.unpack_from(">H", answer, answerofs + 1)
             answerofs += struct.calcsize(">H") + 1
         else:
             raise Exception(
                 "protocol error: unknown result column name '%c'" %
                 (answer[answerofs]))
     if row_docno != 0:
         result.append(
             NblnkRow(serverno, row_docno, row_weight, row_links,
                      row_features, row_titles))
     return result
 def unpackAnswerTextQuery(self, answer, answerofs, answersize):
     result = []
     serverno = 0
     row_docno = 0
     row_weight = 0.0
     row_title = None
     row_paratitle = None
     row_abstract = None
     row_debuginfo = None
     while (answerofs < answersize):
         if answer[answerofs] == ord('_'):
             if not row_title is None:
                 result.append(
                     ResultRow(serverno, row_docno, row_weight, row_title,
                               row_paratitle, row_abstract, row_debuginfo))
             row_docno = 0
             row_weight = 0.0
             row_title = None
             row_paratitle = None
             row_abstract = None
             row_debuginfo = None
             answerofs += 1
         elif answer[answerofs] == ord('D'):
             (row_docno, ) = struct.unpack_from(">I", answer, answerofs + 1)
             answerofs += struct.calcsize(">I") + 1
         elif answer[answerofs] == ord('W'):
             (row_weight, ) = struct.unpack_from(">d", answer,
                                                 answerofs + 1)
             answerofs += struct.calcsize(">d") + 1
         elif answer[answerofs] == ord('T'):
             (row_title,
              answerofs) = strusMessage.unpackString(answer, answerofs + 1)
         elif answer[answerofs] == ord('P'):
             (row_paratitle,
              answerofs) = strusMessage.unpackString(answer, answerofs + 1)
         elif answer[answerofs] == ord('A'):
             (row_abstract,
              answerofs) = strusMessage.unpackString(answer, answerofs + 1)
         elif answer[answerofs] == ord('B'):
             (row_debuginfo,
              answerofs) = strusMessage.unpackString(answer, answerofs + 1)
         elif answer[answerofs] == ord('Z'):
             (serverno, ) = struct.unpack_from(">H", answer, answerofs + 1)
             answerofs += struct.calcsize(">H") + 1
         else:
             raise Exception(
                 "protocol error: unknown result column name '%c'" %
                 (answer[answerofs]))
     if not row_title is None:
         result.append(
             ResultRow(serverno, row_docno, row_weight, row_title,
                       row_paratitle, row_abstract, row_debuginfo))
     return result
Beispiel #3
0
def processCommand( message):
    rt = bytearray(b"Y")
    try:
        global collectionSize
        global termDfMap

        if (message[0] == ord('P')):
            # PUBLISH:
            messagesize = len(message)
            messageofs = 1
            serverno = struct.unpack_from( ">H", message, messageofs)
            messageofs += struct.calcsize( ">H")
            msg = strusctx.unpackStatisticBlob( bytearray( message[ messageofs:]))
            collectionSize += msg.nofdocs
            dfchglist = msg.dfchange
            for dfchg in dfchglist:
                key = termDfMapKey( dfchg.type, dfchg.value)
                if key in termDfMap:
                    termDfMap[ key ] += int( dfchg.increment)
                else:
                    termDfMap[ key ] = int( dfchg.increment)
        elif (message[0] == ord('Q')):
            # QUERY:
            messagesize = len(message)
            messageofs = 1
            while (messageofs < messagesize):
                if (message[ messageofs] == ord('T')):
                    (type, messageofs) = strusMessage.unpackString( message, messageofs+1)
                    (value, messageofs) = strusMessage.unpackString( message, messageofs)
                    df = 0
                    key = termDfMapKey( type, value)
                    if key in termDfMap:
                        df = termDfMap[ key]
                    rt += struct.pack( ">q", df)
                elif (message[ messageofs] == ord('N')):
                    # Fetch N (nof documents), message format [N]:
                    messageofs += 1
                    rt += struct.pack( ">q", collectionSize)
                else:
                    raise Exception( "unknown statistics server sub command")
        else:
            raise Exception( "unknown statistics server command")
    except Exception as e:
        raise tornado.gen.Return( bytearray( "E%s" % e.args[0], 'utf-8'))
    raise tornado.gen.Return( rt)
Beispiel #4
0
def processCommand(message):
    rt = bytearray(b"Y")
    try:
        messagesize = len(message)
        messageofs = 1
        if message[0] == ord('Q'):
            # QUERY:
            Term = collections.namedtuple(
                'Term', ['type', 'value', 'length', 'df', 'weight'])
            nofranks = 20
            restrictdn = 0
            collectionsize = 0
            firstrank = 0
            scheme = "BM25"
            terms = []
            links = []
            with_debuginfo = False
            # Build query to evaluate from the request:
            messagesize = len(message)
            while (messageofs < messagesize):
                if message[messageofs] == ord('I'):
                    (firstrank, ) = struct.unpack_from(">H", message,
                                                       messageofs + 1)
                    messageofs += struct.calcsize(">H") + 1
                elif message[messageofs] == ord('N'):
                    (nofranks, ) = struct.unpack_from(">H", message,
                                                      messageofs + 1)
                    messageofs += struct.calcsize(">H") + 1
                elif message[messageofs] == ord('D'):
                    (restrictdn, ) = struct.unpack_from(
                        ">I", message, messageofs + 1)
                    messageofs += struct.calcsize(">I") + 1
                elif message[messageofs] == ord('M'):
                    (scheme, messageofs) = strusMessage.unpackString(
                        message, messageofs + 1)
                elif message[messageofs] == ord('S'):
                    (collectionsize, ) = struct.unpack_from(
                        ">q", message, messageofs + 1)
                    messageofs += struct.calcsize(">q") + 1
                elif message[messageofs] == ord('T'):
                    (type, messageofs) = strusMessage.unpackString(
                        message, messageofs + 1)
                    (value, messageofs) = strusMessage.unpackString(
                        message, messageofs)
                    (length, df,
                     weight) = struct.unpack_from(">Hqd", message, messageofs)
                    messageofs += struct.calcsize(">Hqd")
                    terms.append(Term(type, value, length, df, weight))
                elif message[messageofs] == ord('L'):
                    (type, messageofs) = strusMessage.unpackString(
                        message, messageofs + 1)
                    (value, messageofs) = strusMessage.unpackString(
                        message, messageofs)
                    (weight, ) = struct.unpack_from(">d", message, messageofs)
                    messageofs += struct.calcsize(">d")
                    links.append(Term(type, value, 1, 0, weight))
                elif message[messageofs] == ord('B'):
                    messageofs += 1
                    with_debuginfo = True
                else:
                    raise tornado.gen.Return(b"Eunknown parameter")

            if (with_debuginfo or debugtrace):
                backend.enableDebugTrace()

            doTitleSelect = isStopWordsOnlyQuery(terms, collectionsize)
            # ... if we have a query containing only stopwords, we reduce our search space to
            # the documents containing some query terms in the title and the most referenced
            # documents in the collection.

            # Evaluate query:
            if restrictdn == 0:
                results = backend.evaluateQuery(scheme, doTitleSelect, terms,
                                                links, collectionsize,
                                                firstrank, nofranks, [],
                                                debugtrace, with_debuginfo)
            else:
                results = backend.evaluateQuery(scheme, doTitleSelect, terms,
                                                links, collectionsize,
                                                firstrank, nofranks,
                                                [restrictdn], debugtrace,
                                                with_debuginfo)

            # Build the result and pack it into the reply message for the client:
            rt.extend(b'Z')
            rt.extend(struct.pack(">H", serverno))

            if scheme == "NBLNK" or scheme == "TILNK" or scheme == "VCLNK":
                for result in results:
                    rt.extend(b'_')
                    rt.extend(b'D')
                    rt.extend(struct.pack(">I", result.docno))
                    rt.extend(b'W')
                    rt.extend(struct.pack(">d", result.weight))
                    for linkid, weight in result.links:
                        rt.extend(b'L')
                        rt.extend(strusMessage.packString(linkid))
                        rt.extend(struct.pack(">d", weight))
            elif scheme == "STDLNK":
                for result in results:
                    rt.extend(b'_')
                    rt.extend(b'D')
                    rt.extend(struct.pack(">I", result.docno))
                    rt.extend(b'W')
                    rt.extend(struct.pack(">d", result.weight))
                    for linkid, weight in result.links:
                        rt.extend(b'L')
                        rt.extend(strusMessage.packString(linkid))
                        rt.extend(struct.pack(">d", weight))
                    for linkid, weight in result.titles:
                        rt.extend(b'T')
                        rt.extend(strusMessage.packString(linkid))
                        rt.extend(struct.pack(">d", weight))
                    for featid, weight in result.features:
                        rt.extend(b'F')
                        rt.extend(strusMessage.packString(featid))
                        rt.extend(struct.pack(">d", weight))
            else:
                for result in results:
                    rt.extend(b'_')
                    rt.extend(b'D')
                    rt.extend(struct.pack(">I", result.docno))
                    rt.extend(b'W')
                    rt.extend(struct.pack(">d", result.weight))
                    rt.extend(b'T')
                    rt.extend(strusMessage.packString(result.title))
                    if result.paratitle:
                        rt.extend(b'P')
                        rt.extend(strusMessage.packString(result.paratitle))
                    if result.debuginfo:
                        rt.extend(b'B')
                        rt.extend(strusMessage.packString(result.debuginfo))
                    rt.extend(b'A')
                    rt.extend(strusMessage.packString(result.abstract))
            if (with_debuginfo or debugtrace):
                backend.printDebugTrace()
                backend.disableDebugTrace()
        else:
            raise Exception("unknown protocol command '%c'" % (message[0]))
    except Exception as e:
        raise tornado.gen.Return(bytearray("E%s" % e, 'utf-8'))
    raise tornado.gen.Return(rt)
    def analyzeQuery(self, scheme, querystr, nofranks):
        terms = []
        relatedterms = []
        errors = []
        conn = None
        try:
            query = bytearray(b"Q")
            query.extend(b'X')
            query.extend(strusMessage.packString(querystr))
            query.extend(b'N')
            query.extend(struct.pack(">H", nofranks))

            ri = qryserver.rindex(':')
            host, port = qryserver[:ri], int(qryserver[ri + 1:])
            conn = yield msgclient.connect(host, port)
            reply = yield msgclient.issueRequest(conn, query)
            if reply[0] == ord('E'):
                raise Exception("failed to query analyze server: %s" %
                                reply[1:])
            elif reply[0] != ord('Y'):
                raise Exception("protocol error in query analyze server query")
            replyofs = 1
            replylen = len(reply)
            while replyofs < replylen:
                if reply[replyofs] == ord('T'):
                    replyofs += 1
                    type = None
                    value = None
                    length = 1
                    while replyofs < replylen:
                        if reply[replyofs] == ord('T'):
                            (type, replyofs) = strusMessage.unpackString(
                                reply, replyofs + 1)
                        elif reply[replyofs] == ord('V'):
                            (value, replyofs) = strusMessage.unpackString(
                                reply, replyofs + 1)
                        elif reply[replyofs] == ord('L'):
                            (length, ) = struct.unpack_from(
                                ">I", reply, replyofs + 1)
                            replyofs += struct.calcsize(">I") + 1
                        elif reply[replyofs] == ord('_'):
                            replyofs += 1
                            break
                    terms.append(QueryTerm(type, value, length, 1.0))
                elif reply[replyofs] == ord('R'):
                    replyofs += 1
                    value = None
                    index = -1
                    weight = 0.0
                    while replyofs < replylen:
                        if reply[replyofs] == ord('V'):
                            (value, replyofs) = strusMessage.unpackString(
                                reply, replyofs + 1)
                        elif reply[replyofs] == ord('I'):
                            (index, ) = struct.unpack_from(
                                ">I", reply, replyofs + 1)
                            replyofs += struct.calcsize(">I") + 1
                        elif reply[replyofs] == ord('W'):
                            (weight, ) = struct.unpack_from(
                                ">d", reply, replyofs + 1)
                            replyofs += struct.calcsize(">d") + 1
                        elif reply[replyofs] == ord('_'):
                            replyofs += 1
                            break
                    valuestr = value.replace('_', ' ')
                    if (valuestr.lower() != querystr.lower()):
                        encvalue = urllib.parse.quote(valuestr)
                        relatedterms.append(
                            RelatedTerm(valuestr, encvalue, index, weight))
                else:
                    break
            if replyofs != replylen:
                raise Exception("query analyze server result format error")
            conn.close()
        except Exception as e:
            errors.append("query analyze server request failed: %s" % e)
            if conn:
                conn.close()
            alt_terms = analyzer.analyzeTermExpression(["text", querystr])
            for term in alt_terms:
                terms.append(QueryTerm(term.type, term.value, term.length,
                                       1.0))
        raise tornado.gen.Return(QueryStruct(terms, [], relatedterms, errors))
def processCommand(message):
    rt = bytearray(b"Y")
    try:
        if debugtrace:
            strusctx.enableDebugTrace("analyzer")

        messagesize = len(message)
        if messagesize < 1:
            raise tornado.gen.Return(b"Eempty request string")
        messageofs = 1
        if message[0] == ord('Q'):
            # Build query to evaluate from the request:
            while (messageofs < messagesize):
                if (message[messageofs] == ord('N')):
                    (nofranks, ) = struct.unpack_from(">H", message,
                                                      messageofs + 1)
                    messageofs += struct.calcsize(">H") + 1
                elif (message[messageofs] == ord('X')):
                    (querystr, messageofs) = strusMessage.unpackString(
                        message, messageofs + 1)
                else:
                    raise tornado.gen.Return(b"Eunknown parameter")

            # Analyze query:
            relatedlist = []
            terms = analyzer.analyzeTermExpression([["text", querystr],
                                                    ["seltext", querystr]])

            # Extract vectors referenced:
            f_indices = []
            for term in terms:
                if term.value[0] == 'F':
                    f_indices.append(int(term.value[1:]))

            # Build real list of features for retrieval in the searchindex:
            pos2term = {}
            pos = 0
            for term in terms:
                if term.type != "selstem":
                    if term.length and term.length > 1:
                        pos2term[pos] = AnalyzerTerm(term.type, term.value,
                                                     term.length)
                        pos += term.length
                    elif term.type == "stem":
                        pos2term[pos] = AnalyzerTerm(term.type, term.value, 1)
                        pos += 1
            pos = 0
            for term in terms:
                if term.type == "selstem":
                    if not pos in pos2term:
                        pos2term[pos] = AnalyzerTerm("stem", term.value, 1)
                    pos += 1
            finalterms = []
            for pos, term in pos2term.items():
                finalterms.append(term)
            terms = finalterms

            # Calculate nearest neighbours of vectors exctracted:
            if f_indices:
                vec = vecstorage.featureVector(f_indices[0])
                if len(f_indices) > 1:
                    for nextidx in f_indices[1:]:
                        vec = [
                            v + i for v, i in zip(
                                vec, vecstorage.featureVector(nextidx))
                        ]
                    neighbour_ranklist = vecsearcher.findSimilar(vec, nofranks)
                else:
                    neighbour_list = []
                    neighbour_set = set()
                    for concept in vecstorage.featureConcepts(
                            "", f_indices[0]):
                        for neighbour in vecstorage.conceptFeatures(
                                "", concept):
                            neighbour_set.add(neighbour)
                    for neighbour in neighbour_set:
                        neighbour_list.append(neighbour)
                    neighbour_ranklist = vecsearcher.findSimilarFromSelection(
                        neighbour_list, vec, nofranks)

                for neighbour in neighbour_ranklist:
                    fname = vecstorage.featureName(neighbour.featidx)
                    relatedlist.append(
                        RelatedTerm(fname, neighbour.featidx,
                                    neighbour.weight))

            # Build the result and pack it into the reply message for the client:
            for termidx, term in enumerate(terms):
                rt.extend(b'T')
                rt.extend(b'T')
                rt.extend(strusMessage.packString(term.type))
                rt.extend(b'V')
                rt.extend(strusMessage.packString(term.value))
                if (term.length):
                    rt.extend(b'L')
                    rt.extend(struct.pack(">I", term.length))
                rt.extend(b'_')
            for related in relatedlist:
                rt.extend(b'R')
                rt.extend(b'V')
                rt.extend(strusMessage.packString(related.value))
                rt.extend(b'I')
                rt.extend(struct.pack(">I", related.index))
                rt.extend(b'W')
                rt.extend(struct.pack(">d", related.weight))
                rt.extend(b'_')
        else:
            if debugtrace:
                strusctx.disableDebugTrace("analyzer")
            raise Exception("unknown protocol command '%c'" % (message[0]))
    except Exception as e:
        if debugtrace:
            strusctx.disableDebugTrace("analyzer")
        raise tornado.gen.Return(bytearray("E%s" % e, 'utf-8'))
    if debugtrace:
        dumpDebugTrace(strusctx.fetchDebugTrace(), "", 5)
        strusctx.disableDebugTrace("analyzer")
    raise tornado.gen.Return(rt)