예제 #1
0
파일: dex.py 프로젝트: Zabrane/aino
def score(msg):
        ret = {}
        msg = decode_netstring_fd(cStringIO.StringIO(msg))
        #cueset_size, cues = ainodex.expand_cueset(
        #        map(int, msg['cues'].split()))

	cueset_size, cues = ainodex.hits(map(int, msg['cues'].split()), 0)
        cueset_size, cues = filter_hits(cueset_size, cues,\
		prior_check = True, site_check = True)
        ret['cueset_size'] = str(cueset_size)

        ok_layers = [i for i, maxf in enumerate(LAYERS)
                if min(maxf, cueset_size) / float(max(maxf, cueset_size)) 
                                > MIN_SCORE]

        if len(LAYERS) - 1 in ok_layers:
                ok_layers.append(len(LAYERS))
        print "OK", ok_layers, "CUES", cueset_size

        t = time.time()
        
        for i in ok_layers:
                layer = ainodex.new_layer(i, cues)
                ret[str(i)] = ainodex.serialize_layer(layer)
        
        erlay.report("Scoring <%s> took %dms" % 
                (msg['cues'], (time.time() - t) * 1000.0))

        return encode_netstring_fd(ret)
예제 #2
0
def score(msg):
    ret = {}
    msg = decode_netstring_fd(cStringIO.StringIO(msg))
    #cueset_size, cues = ainodex.expand_cueset(
    #        map(int, msg['cues'].split()))

    cueset_size, cues = ainodex.hits(map(int, msg['cues'].split()), 0)
    cueset_size, cues = filter_hits(cueset_size, cues,\
prior_check = True, site_check = True)
    ret['cueset_size'] = str(cueset_size)

    ok_layers = [
        i for i, maxf in enumerate(LAYERS)
        if min(maxf, cueset_size) / float(max(maxf, cueset_size)) > MIN_SCORE
    ]

    if len(LAYERS) - 1 in ok_layers:
        ok_layers.append(len(LAYERS))
    print "OK", ok_layers, "CUES", cueset_size

    t = time.time()

    for i in ok_layers:
        layer = ainodex.new_layer(i, cues)
        ret[str(i)] = ainodex.serialize_layer(layer)

    erlay.report("Scoring <%s> took %dms" % (msg['cues'],
                                             (time.time() - t) * 1000.0))

    return encode_netstring_fd(ret)
예제 #3
0
파일: dex.py 프로젝트: Zabrane/aino
def rank(msg):
        t = time.time()
        ret = {}
        msg = cStringIO.StringIO(msg)
        query_msg = decode_netstring_fd(msg)
        layer_msg = decode_netstring_fd(msg)
        erlay.report("Rank init took %dms" %\
                        ((time.time() - t) * 1000.0))

        print >> sys.stderr, "QUERY", query_msg

	if query_msg['mods'] and query_msg['mods'].startswith("site:"):
		ok_site = hash(query_msg['mods'][5:])
		print >> sys.stderr, "SHOW SITE", query_msg['mods'], ok_site
	else:
		ok_site = 0

        t = time.time()
	hits_len, hits = ainodex.hits(map(int, query_msg['keys'].split()), 0)
        ret['num_hits'] = str(hits_len)
        hits_len, hits = filter_hits(hits_len, hits,\
		site_check = True, prior_check=True, show_site=ok_site)
        erlay.report("Hits took %dms" %\
                        ((time.time() - t) * 1000.0))
       
       	print "HITS_LEN", hits_len
        t = time.time()
        layers = [None] * 10
        for layer_str in layer_msg.itervalues():
                ainodex.deserialize_layer(layer_str, layers)
        erlay.report("Deser took %dms" %\
                        ((time.time() - t) * 1000.0))

	#kkeys = map(lambda x: ainopy.did2key(ainopy.sid2doc(x)[0]), ainodex.hit_contents(hits))


        t = time.time()
        ret['ranked'] = ainodex.rank(hits, layers) 
	print >> sys.stderr, "RANKED", array.array("I", ret["ranked"])[:20:2]
	#for key in array.array("I", ret["ranked"])[:20:2]:
	#if key not in okkeys:
	#	print >> sys.stderr, "NOT IN OK", key

        print "LL", len(ret['ranked'])
        erlay.report("Ranking <%s><%s> took %dms" % 
                (query_msg['keys'], query_msg['cues'], 
                        (time.time() - t) * 1000.0))

        return encode_netstring_fd(ret)
예제 #4
0
def rank(msg):
    t = time.time()
    ret = {}
    msg = cStringIO.StringIO(msg)
    query_msg = decode_netstring_fd(msg)
    layer_msg = decode_netstring_fd(msg)
    erlay.report("Rank init took %dms" %\
                    ((time.time() - t) * 1000.0))

    print >> sys.stderr, "QUERY", query_msg

    if query_msg['mods'] and query_msg['mods'].startswith("site:"):
        ok_site = hash(query_msg['mods'][5:])
        print >> sys.stderr, "SHOW SITE", query_msg['mods'], ok_site
    else:
        ok_site = 0

    t = time.time()
    hits_len, hits = ainodex.hits(map(int, query_msg['keys'].split()), 0)
    ret['num_hits'] = str(hits_len)
    hits_len, hits = filter_hits(hits_len, hits,\
site_check = True, prior_check=True, show_site=ok_site)
    erlay.report("Hits took %dms" %\
                    ((time.time() - t) * 1000.0))

    print "HITS_LEN", hits_len
    t = time.time()
    layers = [None] * 10
    for layer_str in layer_msg.itervalues():
        ainodex.deserialize_layer(layer_str, layers)
    erlay.report("Deser took %dms" %\
                    ((time.time() - t) * 1000.0))

    #kkeys = map(lambda x: ainopy.did2key(ainopy.sid2doc(x)[0]), ainodex.hit_contents(hits))

    t = time.time()
    ret['ranked'] = ainodex.rank(hits, layers)
    print >> sys.stderr, "RANKED", array.array("I", ret["ranked"])[:20:2]
    #for key in array.array("I", ret["ranked"])[:20:2]:
    #if key not in okkeys:
    #	print >> sys.stderr, "NOT IN OK", key

    print "LL", len(ret['ranked'])
    erlay.report("Ranking <%s><%s> took %dms" %
                 (query_msg['keys'], query_msg['cues'],
                  (time.time() - t) * 1000.0))

    return encode_netstring_fd(ret)
예제 #5
0
파일: simple.py 프로젝트: Zabrane/aino
import sys, array
import ainodex

ainodex.open()

if len(sys.argv) < 2:
	print "Usage: simple [key] [cue]"
	sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
	# Compute token scores
	ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]
예제 #6
0
import sys, array
import ainodex

ainodex.open()

if len(sys.argv) < 2:
    print "Usage: simple [key] [cue]"
    sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
    # Compute token scores
    ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]