예제 #1
0
def score(msg):
    ret = {}
    msg = decode_netstring_fd(cStringIO.StringIO(msg))
    #cueset_size, cues = ainodex.expand_cueset(
    #        map(int, msg['cues'].split()))

    cueset_size, cues = ainodex.hits(map(int, msg['cues'].split()), 0)
    cueset_size, cues = filter_hits(cueset_size, cues,\
prior_check = True, site_check = True)
    ret['cueset_size'] = str(cueset_size)

    ok_layers = [
        i for i, maxf in enumerate(LAYERS)
        if min(maxf, cueset_size) / float(max(maxf, cueset_size)) > MIN_SCORE
    ]

    if len(LAYERS) - 1 in ok_layers:
        ok_layers.append(len(LAYERS))
    print "OK", ok_layers, "CUES", cueset_size

    t = time.time()

    for i in ok_layers:
        layer = ainodex.new_layer(i, cues)
        ret[str(i)] = ainodex.serialize_layer(layer)

    erlay.report("Scoring <%s> took %dms" % (msg['cues'],
                                             (time.time() - t) * 1000.0))

    return encode_netstring_fd(ret)
예제 #2
0
파일: dex.py 프로젝트: Zabrane/aino
def score(msg):
        ret = {}
        msg = decode_netstring_fd(cStringIO.StringIO(msg))
        #cueset_size, cues = ainodex.expand_cueset(
        #        map(int, msg['cues'].split()))

	cueset_size, cues = ainodex.hits(map(int, msg['cues'].split()), 0)
        cueset_size, cues = filter_hits(cueset_size, cues,\
		prior_check = True, site_check = True)
        ret['cueset_size'] = str(cueset_size)

        ok_layers = [i for i, maxf in enumerate(LAYERS)
                if min(maxf, cueset_size) / float(max(maxf, cueset_size)) 
                                > MIN_SCORE]

        if len(LAYERS) - 1 in ok_layers:
                ok_layers.append(len(LAYERS))
        print "OK", ok_layers, "CUES", cueset_size

        t = time.time()
        
        for i in ok_layers:
                layer = ainodex.new_layer(i, cues)
                ret[str(i)] = ainodex.serialize_layer(layer)
        
        erlay.report("Scoring <%s> took %dms" % 
                (msg['cues'], (time.time() - t) * 1000.0))

        return encode_netstring_fd(ret)
예제 #3
0
파일: simple.py 프로젝트: Zabrane/aino
ainodex.open()

if len(sys.argv) < 2:
	print "Usage: simple [key] [cue]"
	sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
	# Compute token scores
	ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]

print zip(doc_keys, doc_scores)
예제 #4
0
ainodex.open()

if len(sys.argv) < 2:
    print "Usage: simple [key] [cue]"
    sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
    # Compute token scores
    ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]

print zip(doc_keys, doc_scores)