Ejemplo n.º 1
0
Archivo: dex.py Proyecto: Zabrane/aino
def rank(msg):
        t = time.time()
        ret = {}
        msg = cStringIO.StringIO(msg)
        query_msg = decode_netstring_fd(msg)
        layer_msg = decode_netstring_fd(msg)
        erlay.report("Rank init took %dms" %\
                        ((time.time() - t) * 1000.0))

        print >> sys.stderr, "QUERY", query_msg

	if query_msg['mods'] and query_msg['mods'].startswith("site:"):
		ok_site = hash(query_msg['mods'][5:])
		print >> sys.stderr, "SHOW SITE", query_msg['mods'], ok_site
	else:
		ok_site = 0

        t = time.time()
	hits_len, hits = ainodex.hits(map(int, query_msg['keys'].split()), 0)
        ret['num_hits'] = str(hits_len)
        hits_len, hits = filter_hits(hits_len, hits,\
		site_check = True, prior_check=True, show_site=ok_site)
        erlay.report("Hits took %dms" %\
                        ((time.time() - t) * 1000.0))
       
       	print "HITS_LEN", hits_len
        t = time.time()
        layers = [None] * 10
        for layer_str in layer_msg.itervalues():
                ainodex.deserialize_layer(layer_str, layers)
        erlay.report("Deser took %dms" %\
                        ((time.time() - t) * 1000.0))

	#kkeys = map(lambda x: ainopy.did2key(ainopy.sid2doc(x)[0]), ainodex.hit_contents(hits))


        t = time.time()
        ret['ranked'] = ainodex.rank(hits, layers) 
	print >> sys.stderr, "RANKED", array.array("I", ret["ranked"])[:20:2]
	#for key in array.array("I", ret["ranked"])[:20:2]:
	#if key not in okkeys:
	#	print >> sys.stderr, "NOT IN OK", key

        print "LL", len(ret['ranked'])
        erlay.report("Ranking <%s><%s> took %dms" % 
                (query_msg['keys'], query_msg['cues'], 
                        (time.time() - t) * 1000.0))

        return encode_netstring_fd(ret)
Ejemplo n.º 2
0
def rank(msg):
    t = time.time()
    ret = {}
    msg = cStringIO.StringIO(msg)
    query_msg = decode_netstring_fd(msg)
    layer_msg = decode_netstring_fd(msg)
    erlay.report("Rank init took %dms" %\
                    ((time.time() - t) * 1000.0))

    print >> sys.stderr, "QUERY", query_msg

    if query_msg['mods'] and query_msg['mods'].startswith("site:"):
        ok_site = hash(query_msg['mods'][5:])
        print >> sys.stderr, "SHOW SITE", query_msg['mods'], ok_site
    else:
        ok_site = 0

    t = time.time()
    hits_len, hits = ainodex.hits(map(int, query_msg['keys'].split()), 0)
    ret['num_hits'] = str(hits_len)
    hits_len, hits = filter_hits(hits_len, hits,\
site_check = True, prior_check=True, show_site=ok_site)
    erlay.report("Hits took %dms" %\
                    ((time.time() - t) * 1000.0))

    print "HITS_LEN", hits_len
    t = time.time()
    layers = [None] * 10
    for layer_str in layer_msg.itervalues():
        ainodex.deserialize_layer(layer_str, layers)
    erlay.report("Deser took %dms" %\
                    ((time.time() - t) * 1000.0))

    #kkeys = map(lambda x: ainopy.did2key(ainopy.sid2doc(x)[0]), ainodex.hit_contents(hits))

    t = time.time()
    ret['ranked'] = ainodex.rank(hits, layers)
    print >> sys.stderr, "RANKED", array.array("I", ret["ranked"])[:20:2]
    #for key in array.array("I", ret["ranked"])[:20:2]:
    #if key not in okkeys:
    #	print >> sys.stderr, "NOT IN OK", key

    print "LL", len(ret['ranked'])
    erlay.report("Ranking <%s><%s> took %dms" %
                 (query_msg['keys'], query_msg['cues'],
                  (time.time() - t) * 1000.0))

    return encode_netstring_fd(ret)
Ejemplo n.º 3
0
ainodex.open()

if len(sys.argv) < 2:
	print "Usage: simple [key] [cue]"
	sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
	# Compute token scores
	ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]

print zip(doc_keys, doc_scores)
Ejemplo n.º 4
0
ainodex.open()

if len(sys.argv) < 2:
    print "Usage: simple [key] [cue]"
    sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
    # Compute token scores
    ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]

print zip(doc_keys, doc_scores)