Example #1
0
def index(text):
        valid = train.valid
        corp = train.corp
        models = map(lambda code: kenlm.LanguageModel('lm/' + code + ".binary"), corp)
        langs = dict(valid) 
        text = text.lower()
        results = train.language(models,text)
        persistent = results
        return "(" + langs[results[0]] + "," + str(results[1]) + ")"
Example #2
0
def index(text):
    valid = train.valid
    corp = train.corp
    models = map(lambda code: kenlm.LanguageModel('lm/' + code + ".binary"),
                 corp)
    langs = dict(valid)
    text = text.lower()
    results = train.language(models, text)
    persistent = results
    return "(" + langs[results[0]] + "," + str(results[1]) + ")"
Example #3
0
def test():
    counts = {}
    for c in corp:
        right = 0
        wrong = 0
        wrongs = defaultdict(int)
        text = io.open('testcorpus/' + c, encoding='utf-8').read()
        #because Chinese is logographic, so tokenizing by space is inappropriate
        text = text.split()
        for i in random.sample(range(1, len(text)-23), 1000):
            inds = map(lambda j: i + j, range(random.randint(1, 24)))
            randogram = map(lambda j: text[j], inds)
            ans = train.language(models, ' '.join(randogram))[0]
            if(ans != c): wrong += 1
            else: right += 1
            counts[c] = (right, wrong)
    return counts
Example #4
0
def test():
    counts = {}
    for c in corp:
        right = 0
        wrong = 0
        wrongs = defaultdict(int)
        text = io.open('testcorpus/' + c, encoding='utf-8').read()
        #because Chinese is logographic, so tokenizing by space is inappropriate
        text = text.split()
        for i in random.sample(range(1, len(text) - 23), 1000):
            inds = map(lambda j: i + j, range(random.randint(1, 24)))
            randogram = map(lambda j: text[j], inds)
            ans = train.language(models, ' '.join(randogram))[0]
            if (ans != c): wrong += 1
            else: right += 1
            counts[c] = (right, wrong)
    return counts
Example #5
0
def hello(text):
#       return str(train.models[0].order)
        l = train.language(train.models, text)
        return "(" +l[0] + ", " + str(l[1]) + ")"
Example #6
0
def hello(text):
    #       return str(train.models[0].order)
    l = train.language(train.models, text)
    return "(" + l[0] + ", " + str(l[1]) + ")"