def unpackAnswerLinkQuery(self, answer, answerofs, answersize): result = [] serverno = 0 row_docno = 0 row_weight = 0.0 row_links = [] row_titles = [] row_features = [] while (answerofs < answersize): if answer[answerofs] == ord('_'): if row_docno != 0: result.append( NblnkRow(serverno, row_docno, row_weight, row_links, row_features, row_titles)) row_docno = 0 row_weight = 0.0 row_links = [] row_titles = [] row_features = [] answerofs += 1 elif answer[answerofs] == ord('D'): (row_docno, ) = struct.unpack_from(">I", answer, answerofs + 1) answerofs += struct.calcsize(">I") + 1 elif answer[answerofs] == ord('W'): (row_weight, ) = struct.unpack_from(">d", answer, answerofs + 1) answerofs += struct.calcsize(">d") + 1 elif answer[answerofs] == ord('L'): (idstr, answerofs) = strusMessage.unpackString(answer, answerofs + 1) (weight, ) = struct.unpack_from(">d", answer, answerofs) answerofs += struct.calcsize(">d") row_links.append([idstr, weight]) elif answer[answerofs] == ord('F'): (idstr, answerofs) = strusMessage.unpackString(answer, answerofs + 1) (weight, ) = struct.unpack_from(">d", answer, answerofs) answerofs += struct.calcsize(">d") row_features.append([idstr, weight]) elif answer[answerofs] == ord('T'): (idstr, answerofs) = strusMessage.unpackString(answer, answerofs + 1) (weight, ) = struct.unpack_from("d", answer, answerofs) answerofs += struct.calcsize(">d") row_titles.append([idstr, weight]) elif answer[answerofs] == ord('Z'): (serverno, ) = struct.unpack_from(">H", answer, answerofs + 1) answerofs += struct.calcsize(">H") + 1 else: raise Exception( "protocol error: unknown result column name '%c'" % (answer[answerofs])) if row_docno != 0: result.append( NblnkRow(serverno, row_docno, row_weight, row_links, row_features, row_titles)) return result
def unpackAnswerTextQuery(self, answer, answerofs, answersize): result = [] serverno = 0 row_docno = 0 row_weight = 0.0 row_title = None row_paratitle = None row_abstract = None row_debuginfo = None while (answerofs < answersize): if answer[answerofs] == ord('_'): if not row_title is None: result.append( ResultRow(serverno, row_docno, row_weight, row_title, row_paratitle, row_abstract, row_debuginfo)) row_docno = 0 row_weight = 0.0 row_title = None row_paratitle = None row_abstract = None row_debuginfo = None answerofs += 1 elif answer[answerofs] == ord('D'): (row_docno, ) = struct.unpack_from(">I", answer, answerofs + 1) answerofs += struct.calcsize(">I") + 1 elif answer[answerofs] == ord('W'): (row_weight, ) = struct.unpack_from(">d", answer, answerofs + 1) answerofs += struct.calcsize(">d") + 1 elif answer[answerofs] == ord('T'): (row_title, answerofs) = strusMessage.unpackString(answer, answerofs + 1) elif answer[answerofs] == ord('P'): (row_paratitle, answerofs) = strusMessage.unpackString(answer, answerofs + 1) elif answer[answerofs] == ord('A'): (row_abstract, answerofs) = strusMessage.unpackString(answer, answerofs + 1) elif answer[answerofs] == ord('B'): (row_debuginfo, answerofs) = strusMessage.unpackString(answer, answerofs + 1) elif answer[answerofs] == ord('Z'): (serverno, ) = struct.unpack_from(">H", answer, answerofs + 1) answerofs += struct.calcsize(">H") + 1 else: raise Exception( "protocol error: unknown result column name '%c'" % (answer[answerofs])) if not row_title is None: result.append( ResultRow(serverno, row_docno, row_weight, row_title, row_paratitle, row_abstract, row_debuginfo)) return result
def processCommand( message): rt = bytearray(b"Y") try: global collectionSize global termDfMap if (message[0] == ord('P')): # PUBLISH: messagesize = len(message) messageofs = 1 serverno = struct.unpack_from( ">H", message, messageofs) messageofs += struct.calcsize( ">H") msg = strusctx.unpackStatisticBlob( bytearray( message[ messageofs:])) collectionSize += msg.nofdocs dfchglist = msg.dfchange for dfchg in dfchglist: key = termDfMapKey( dfchg.type, dfchg.value) if key in termDfMap: termDfMap[ key ] += int( dfchg.increment) else: termDfMap[ key ] = int( dfchg.increment) elif (message[0] == ord('Q')): # QUERY: messagesize = len(message) messageofs = 1 while (messageofs < messagesize): if (message[ messageofs] == ord('T')): (type, messageofs) = strusMessage.unpackString( message, messageofs+1) (value, messageofs) = strusMessage.unpackString( message, messageofs) df = 0 key = termDfMapKey( type, value) if key in termDfMap: df = termDfMap[ key] rt += struct.pack( ">q", df) elif (message[ messageofs] == ord('N')): # Fetch N (nof documents), message format [N]: messageofs += 1 rt += struct.pack( ">q", collectionSize) else: raise Exception( "unknown statistics server sub command") else: raise Exception( "unknown statistics server command") except Exception as e: raise tornado.gen.Return( bytearray( "E%s" % e.args[0], 'utf-8')) raise tornado.gen.Return( rt)
def processCommand(message): rt = bytearray(b"Y") try: messagesize = len(message) messageofs = 1 if message[0] == ord('Q'): # QUERY: Term = collections.namedtuple( 'Term', ['type', 'value', 'length', 'df', 'weight']) nofranks = 20 restrictdn = 0 collectionsize = 0 firstrank = 0 scheme = "BM25" terms = [] links = [] with_debuginfo = False # Build query to evaluate from the request: messagesize = len(message) while (messageofs < messagesize): if message[messageofs] == ord('I'): (firstrank, ) = struct.unpack_from(">H", message, messageofs + 1) messageofs += struct.calcsize(">H") + 1 elif message[messageofs] == ord('N'): (nofranks, ) = struct.unpack_from(">H", message, messageofs + 1) messageofs += struct.calcsize(">H") + 1 elif message[messageofs] == ord('D'): (restrictdn, ) = struct.unpack_from( ">I", message, messageofs + 1) messageofs += struct.calcsize(">I") + 1 elif message[messageofs] == ord('M'): (scheme, messageofs) = strusMessage.unpackString( message, messageofs + 1) elif message[messageofs] == ord('S'): (collectionsize, ) = struct.unpack_from( ">q", message, messageofs + 1) messageofs += struct.calcsize(">q") + 1 elif message[messageofs] == ord('T'): (type, messageofs) = strusMessage.unpackString( message, messageofs + 1) (value, messageofs) = strusMessage.unpackString( message, messageofs) (length, df, weight) = struct.unpack_from(">Hqd", message, messageofs) messageofs += struct.calcsize(">Hqd") terms.append(Term(type, value, length, df, weight)) elif message[messageofs] == ord('L'): (type, messageofs) = strusMessage.unpackString( message, messageofs + 1) (value, messageofs) = strusMessage.unpackString( message, messageofs) (weight, ) = struct.unpack_from(">d", message, messageofs) messageofs += struct.calcsize(">d") links.append(Term(type, value, 1, 0, weight)) elif message[messageofs] == ord('B'): messageofs += 1 with_debuginfo = True else: raise tornado.gen.Return(b"Eunknown parameter") if (with_debuginfo or debugtrace): backend.enableDebugTrace() doTitleSelect = isStopWordsOnlyQuery(terms, collectionsize) # ... if we have a query containing only stopwords, we reduce our search space to # the documents containing some query terms in the title and the most referenced # documents in the collection. # Evaluate query: if restrictdn == 0: results = backend.evaluateQuery(scheme, doTitleSelect, terms, links, collectionsize, firstrank, nofranks, [], debugtrace, with_debuginfo) else: results = backend.evaluateQuery(scheme, doTitleSelect, terms, links, collectionsize, firstrank, nofranks, [restrictdn], debugtrace, with_debuginfo) # Build the result and pack it into the reply message for the client: rt.extend(b'Z') rt.extend(struct.pack(">H", serverno)) if scheme == "NBLNK" or scheme == "TILNK" or scheme == "VCLNK": for result in results: rt.extend(b'_') rt.extend(b'D') rt.extend(struct.pack(">I", result.docno)) rt.extend(b'W') rt.extend(struct.pack(">d", result.weight)) for linkid, weight in result.links: rt.extend(b'L') rt.extend(strusMessage.packString(linkid)) rt.extend(struct.pack(">d", weight)) elif scheme == "STDLNK": for result in results: rt.extend(b'_') rt.extend(b'D') rt.extend(struct.pack(">I", result.docno)) rt.extend(b'W') rt.extend(struct.pack(">d", result.weight)) for linkid, weight in result.links: rt.extend(b'L') rt.extend(strusMessage.packString(linkid)) rt.extend(struct.pack(">d", weight)) for linkid, weight in result.titles: rt.extend(b'T') rt.extend(strusMessage.packString(linkid)) rt.extend(struct.pack(">d", weight)) for featid, weight in result.features: rt.extend(b'F') rt.extend(strusMessage.packString(featid)) rt.extend(struct.pack(">d", weight)) else: for result in results: rt.extend(b'_') rt.extend(b'D') rt.extend(struct.pack(">I", result.docno)) rt.extend(b'W') rt.extend(struct.pack(">d", result.weight)) rt.extend(b'T') rt.extend(strusMessage.packString(result.title)) if result.paratitle: rt.extend(b'P') rt.extend(strusMessage.packString(result.paratitle)) if result.debuginfo: rt.extend(b'B') rt.extend(strusMessage.packString(result.debuginfo)) rt.extend(b'A') rt.extend(strusMessage.packString(result.abstract)) if (with_debuginfo or debugtrace): backend.printDebugTrace() backend.disableDebugTrace() else: raise Exception("unknown protocol command '%c'" % (message[0])) except Exception as e: raise tornado.gen.Return(bytearray("E%s" % e, 'utf-8')) raise tornado.gen.Return(rt)
def analyzeQuery(self, scheme, querystr, nofranks): terms = [] relatedterms = [] errors = [] conn = None try: query = bytearray(b"Q") query.extend(b'X') query.extend(strusMessage.packString(querystr)) query.extend(b'N') query.extend(struct.pack(">H", nofranks)) ri = qryserver.rindex(':') host, port = qryserver[:ri], int(qryserver[ri + 1:]) conn = yield msgclient.connect(host, port) reply = yield msgclient.issueRequest(conn, query) if reply[0] == ord('E'): raise Exception("failed to query analyze server: %s" % reply[1:]) elif reply[0] != ord('Y'): raise Exception("protocol error in query analyze server query") replyofs = 1 replylen = len(reply) while replyofs < replylen: if reply[replyofs] == ord('T'): replyofs += 1 type = None value = None length = 1 while replyofs < replylen: if reply[replyofs] == ord('T'): (type, replyofs) = strusMessage.unpackString( reply, replyofs + 1) elif reply[replyofs] == ord('V'): (value, replyofs) = strusMessage.unpackString( reply, replyofs + 1) elif reply[replyofs] == ord('L'): (length, ) = struct.unpack_from( ">I", reply, replyofs + 1) replyofs += struct.calcsize(">I") + 1 elif reply[replyofs] == ord('_'): replyofs += 1 break terms.append(QueryTerm(type, value, length, 1.0)) elif reply[replyofs] == ord('R'): replyofs += 1 value = None index = -1 weight = 0.0 while replyofs < replylen: if reply[replyofs] == ord('V'): (value, replyofs) = strusMessage.unpackString( reply, replyofs + 1) elif reply[replyofs] == ord('I'): (index, ) = struct.unpack_from( ">I", reply, replyofs + 1) replyofs += struct.calcsize(">I") + 1 elif reply[replyofs] == ord('W'): (weight, ) = struct.unpack_from( ">d", reply, replyofs + 1) replyofs += struct.calcsize(">d") + 1 elif reply[replyofs] == ord('_'): replyofs += 1 break valuestr = value.replace('_', ' ') if (valuestr.lower() != querystr.lower()): encvalue = urllib.parse.quote(valuestr) relatedterms.append( RelatedTerm(valuestr, encvalue, index, weight)) else: break if replyofs != replylen: raise Exception("query analyze server result format error") conn.close() except Exception as e: errors.append("query analyze server request failed: %s" % e) if conn: conn.close() alt_terms = analyzer.analyzeTermExpression(["text", querystr]) for term in alt_terms: terms.append(QueryTerm(term.type, term.value, term.length, 1.0)) raise tornado.gen.Return(QueryStruct(terms, [], relatedterms, errors))
def processCommand(message): rt = bytearray(b"Y") try: if debugtrace: strusctx.enableDebugTrace("analyzer") messagesize = len(message) if messagesize < 1: raise tornado.gen.Return(b"Eempty request string") messageofs = 1 if message[0] == ord('Q'): # Build query to evaluate from the request: while (messageofs < messagesize): if (message[messageofs] == ord('N')): (nofranks, ) = struct.unpack_from(">H", message, messageofs + 1) messageofs += struct.calcsize(">H") + 1 elif (message[messageofs] == ord('X')): (querystr, messageofs) = strusMessage.unpackString( message, messageofs + 1) else: raise tornado.gen.Return(b"Eunknown parameter") # Analyze query: relatedlist = [] terms = analyzer.analyzeTermExpression([["text", querystr], ["seltext", querystr]]) # Extract vectors referenced: f_indices = [] for term in terms: if term.value[0] == 'F': f_indices.append(int(term.value[1:])) # Build real list of features for retrieval in the searchindex: pos2term = {} pos = 0 for term in terms: if term.type != "selstem": if term.length and term.length > 1: pos2term[pos] = AnalyzerTerm(term.type, term.value, term.length) pos += term.length elif term.type == "stem": pos2term[pos] = AnalyzerTerm(term.type, term.value, 1) pos += 1 pos = 0 for term in terms: if term.type == "selstem": if not pos in pos2term: pos2term[pos] = AnalyzerTerm("stem", term.value, 1) pos += 1 finalterms = [] for pos, term in pos2term.items(): finalterms.append(term) terms = finalterms # Calculate nearest neighbours of vectors exctracted: if f_indices: vec = vecstorage.featureVector(f_indices[0]) if len(f_indices) > 1: for nextidx in f_indices[1:]: vec = [ v + i for v, i in zip( vec, vecstorage.featureVector(nextidx)) ] neighbour_ranklist = vecsearcher.findSimilar(vec, nofranks) else: neighbour_list = [] neighbour_set = set() for concept in vecstorage.featureConcepts( "", f_indices[0]): for neighbour in vecstorage.conceptFeatures( "", concept): neighbour_set.add(neighbour) for neighbour in neighbour_set: neighbour_list.append(neighbour) neighbour_ranklist = vecsearcher.findSimilarFromSelection( neighbour_list, vec, nofranks) for neighbour in neighbour_ranklist: fname = vecstorage.featureName(neighbour.featidx) relatedlist.append( RelatedTerm(fname, neighbour.featidx, neighbour.weight)) # Build the result and pack it into the reply message for the client: for termidx, term in enumerate(terms): rt.extend(b'T') rt.extend(b'T') rt.extend(strusMessage.packString(term.type)) rt.extend(b'V') rt.extend(strusMessage.packString(term.value)) if (term.length): rt.extend(b'L') rt.extend(struct.pack(">I", term.length)) rt.extend(b'_') for related in relatedlist: rt.extend(b'R') rt.extend(b'V') rt.extend(strusMessage.packString(related.value)) rt.extend(b'I') rt.extend(struct.pack(">I", related.index)) rt.extend(b'W') rt.extend(struct.pack(">d", related.weight)) rt.extend(b'_') else: if debugtrace: strusctx.disableDebugTrace("analyzer") raise Exception("unknown protocol command '%c'" % (message[0])) except Exception as e: if debugtrace: strusctx.disableDebugTrace("analyzer") raise tornado.gen.Return(bytearray("E%s" % e, 'utf-8')) if debugtrace: dumpDebugTrace(strusctx.fetchDebugTrace(), "", 5) strusctx.disableDebugTrace("analyzer") raise tornado.gen.Return(rt)