예제 #1
0
파일: ainoapi.py 프로젝트: Zabrane/aino
def _to_xid(t):
        if type(t) == str:
                xid = ainodex.token2ixeme(t)
        elif type(t) == int:
                xid = t
        else:
                raise ValueError("Argument not a string or integer")
        if not xid:
                raise KeyError("No such ixeme")
        return xid
예제 #2
0
def _to_xid(t):
    if type(t) == str:
        xid = ainodex.token2ixeme(t)
    elif type(t) == int:
        xid = t
    else:
        raise ValueError("Argument not a string or integer")
    if not xid:
        raise KeyError("No such ixeme")
    return xid
예제 #3
0
def find_id(token):
    xids = [ainodex.token2ixeme(w) for w in re.split('(?u)\W+', token) if w]
    return filter(lambda x: x, xids)
예제 #4
0
파일: simple.py 프로젝트: Zabrane/aino
import sys, array
import ainodex

ainodex.open()

if len(sys.argv) < 2:
	print "Usage: simple [key] [cue]"
	sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
	# Compute token scores
	ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]
예제 #5
0
import sys, array
import ainodex

ainodex.open()

if len(sys.argv) < 2:
    print "Usage: simple [key] [cue]"
    sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
    # Compute token scores
    ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]
예제 #6
0
파일: make_query.py 프로젝트: Zabrane/aino
def find_id(token):
        xids = [ainodex.token2ixeme(w) for w in re.split('(?u)\W+', token) if w]
        return filter(lambda x: x, xids)