Ejemplo n.º 1
0
from pynlpl.formats.sonar import Corpus
from pynlpl.lm.lm import SimpleLanguageModel

#syntax: ./make_sonar_lm.py sonar_dir output_file n [category]

outputfile = sys.argv[2]


n=3
restrictcollection=""
try:
    n = int(sys.argv[3])
    restrictcollection = sys.argv[4]
except:
    pass

lm = SimpleLanguageModel(n)

for doc in Corpus(sys.argv[1],'tok',restrictcollection):
    for sentence_id, sentence in doc.sentences():
	print sentence_id
        words = [ word for word, id, pos, lemma in sentence ]
        lm.append(words)
lm.save(outputfile)




Ejemplo n.º 2
0
#!/usr/bin/env python
#-*- coding:utf-8 -*-

import sys
import os
sys.path.append(sys.path[0] + '/../..')
os.environ['PYTHONPATH'] = sys.path[0] + '/../..'

from pynlpl.lm.lm import SimpleLanguageModel

#syntax: ./query_lm.py lm_file sentence

lmfile = sys.argv[1]

lm = SimpleLanguageModel()
lm.load(lmfile)
print lm.scoresentence(sys.argv[2:])