def rank(msg): t = time.time() ret = {} msg = cStringIO.StringIO(msg) query_msg = decode_netstring_fd(msg) layer_msg = decode_netstring_fd(msg) erlay.report("Rank init took %dms" %\ ((time.time() - t) * 1000.0)) print >> sys.stderr, "QUERY", query_msg if query_msg['mods'] and query_msg['mods'].startswith("site:"): ok_site = hash(query_msg['mods'][5:]) print >> sys.stderr, "SHOW SITE", query_msg['mods'], ok_site else: ok_site = 0 t = time.time() hits_len, hits = ainodex.hits(map(int, query_msg['keys'].split()), 0) ret['num_hits'] = str(hits_len) hits_len, hits = filter_hits(hits_len, hits,\ site_check = True, prior_check=True, show_site=ok_site) erlay.report("Hits took %dms" %\ ((time.time() - t) * 1000.0)) print "HITS_LEN", hits_len t = time.time() layers = [None] * 10 for layer_str in layer_msg.itervalues(): ainodex.deserialize_layer(layer_str, layers) erlay.report("Deser took %dms" %\ ((time.time() - t) * 1000.0)) #kkeys = map(lambda x: ainopy.did2key(ainopy.sid2doc(x)[0]), ainodex.hit_contents(hits)) t = time.time() ret['ranked'] = ainodex.rank(hits, layers) print >> sys.stderr, "RANKED", array.array("I", ret["ranked"])[:20:2] #for key in array.array("I", ret["ranked"])[:20:2]: #if key not in okkeys: # print >> sys.stderr, "NOT IN OK", key print "LL", len(ret['ranked']) erlay.report("Ranking <%s><%s> took %dms" % (query_msg['keys'], query_msg['cues'], (time.time() - t) * 1000.0)) return encode_netstring_fd(ret)
def merge_scores(msg): msg = cStringIO.StringIO(msg) layers = [None] * 10 cueset_size = 0 while True: try: iblock_layers = decode_netstring_fd(msg) except EOFError: break cueset_size += int(iblock_layers['cueset_size']) del iblock_layers['cueset_size'] for layer_data in iblock_layers.itervalues(): offs, layer_id, layer =\ ainodex.deserialize_layer( layer_data, layers) #XXX: Since ixemes are allocated on different layers on each layer, # we must make sure that the ixeme counts match on every layer. This # could be easily avoided if ixemes were on the same layers on all # iblocks. This should be easy to fix. t = time.time() ainodex.sync_layers(layers) erlay.report("Syncing layers took %dms" %\ ((time.time() - t) * 1000.0)) print "CUE", type(cueset_size), cueset_size for layer in layers: if layer: ainodex.normalize_layer(layer, normtable, cueset_size) layers = [(str(i), ainodex.serialize_layer(layer)) for i, layer in enumerate(layers) if layer] return encode_netstring_fd(dict(layers))
if query_msg['mods']: if query_msg['mods'].startswith("site"): site_check = False elif query_msg['mods'].startswith("score"): show_score = True except Exception, x: erlay.report("Invalid or missing query string: %s" % query_msg['query']) traceback.print_exc() offs = 0 query_q = "" layers = [None] * 10 for layer_str in score_msg.itervalues(): ainodex.deserialize_layer(layer_str, layers) ranked = array.array("I", rank_msg['merged'])[::2] results = [] if show_score: results.append(show_best_ix(layers)) hits = len(ranked) + 1 site_check = True else: hits = 0 valid_hits = 0 seen_sites = {} seen_md5s = {} while hits < len(ranked): dockey = ranked[hits]