Esempio n. 1
0
import pickle
import main
import nltk

names = 'tagset known_words q_values e_values'.split()
objects = {}
for name in names:
    with open('parameters/' + name + '.pkl', 'rb') as object_file:
        objects[name] = pickle.load(object_file)

prompt = 'Sentence > '

input_string = None
while not input_string in ['q', 'quit', 'exit']:
    if not input_string:
        input_string = 'Enter an English sentence to tag its tokens with the respective parts of speech, -- this is an example.'
        print(prompt + input_string)
    else:
        input_string = input(prompt)
    sentence = nltk.word_tokenize(input_string)
    tagged = main.tag_viterbi(sentence, objects['tagset'],
                              objects['known_words'], objects['q_values'],
                              objects['e_values'])
    print('Tagged : ' + " ".join(["{0}/{1}".format(*x) for x in tagged]))
Esempio n. 2
0
""" model parameters """
p = {}
for n in 'e_values known_words q_values tagset'.split():
    with open('parameters/' + n + '.pkl', 'rb') as f:
        p[n] = pickle.load(f)

""" sentences """
s = [] 

for t in t:

    print('Downloading and processing summary of "' + t + '" ...')

    """ list of summary word-tokenized sentences """
    ss = [nltk.word_tokenize(s) for s in z.tokenize(wi.summary(t))]
    
    s.extend(list(filter(c, ss)))

print("Number of selected sentences: {}".format(len(s)))

for s0 in [" ".join(l) for l in s]:
    print(s0)

for tagg in [main.tag_viterbi(sen,
    p['tagset'],
    p['known_words'],
    p['q_values'],
    p['e_values']) for sen in s]:
    print(" ".join(map(lambda z: "/".join(map(str,z)),tagg)))
Esempio n. 3
0
import pickle
import main
import nltk

names = 'tagset known_words q_values e_values'.split()
objects = {}
for name in names:
    with open('parameters/' + name + '.pkl', 'rb') as object_file:
        objects[name] = pickle.load(object_file)

prompt = 'Sentence > '

input_string = None
while not input_string in ['q','quit','exit']:
    if not input_string:
        input_string = 'Enter an English sentence to tag its tokens with the respective parts of speech, -- this is an example.'
        print(prompt + input_string)
    else:
        input_string = input(prompt)
    sentence = nltk.word_tokenize(input_string)
    tagged = main.tag_viterbi(sentence, objects['tagset'], objects['known_words'], objects['q_values'], objects['e_values'])
    print('Tagged : ' + " ".join(["{0}/{1}".format(*x) for x in tagged]))
""" model parameters """
p = {}
for n in 'e_values known_words q_values tagset'.split():
    with open('parameters/' + n + '.pkl', 'rb') as f:
        p[n] = pickle.load(f)

""" sentences """
s = [] 

for t in t:

    print('Downloading and processing summary of "' + t + '" ...')

    """ list of summary word-tokenized sentences """
    ss = [nltk.word_tokenize(s) for s in z.tokenize(wi.summary(t))]
    
    s.extend(list(filter(c, ss)))

print("Number of selected sentences: {}".format(len(s)))

for s0 in [" ".join(l) for l in s]:
    print(s0)

for tagg in [main.tag_viterbi(sen,
    p['tagset'],
    p['known_words'],
    p['q_values'],
    p['e_values']) for sen in s]:
    print(" ".join(map(lambda z: "/".join(map(str,z)),tagg)))