예제 #1
0
    def test_generate_sent_1gram(self):
        ngram = NGram(1, self.sents)
        generator = NGramGenerator(ngram)

        voc = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'}

        for i in range(100):
            sent = generator.generate_sent()
            self.assertTrue(set(sent).issubset(voc))
예제 #2
0
    def test_generate_sent_1gram(self):
        ngram = NGram(1, self.sents)
        generator = NGramGenerator(ngram)

        voc = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'}

        for i in range(100):
            sent = generator.generate_sent()
            self.assertTrue(set(sent).issubset(voc))
예제 #3
0
    def test_generate_token(self):
        ngram = NGram(2, self.sents)
        generator = NGramGenerator(ngram)

        for i in range(100):
            # after 'el' always comes 'gato':
            token = generator.generate_token(('el',))
            self.assertEqual(token, 'gato')

            # after 'come' may come 'pescado' or 'salmón'
            token = generator.generate_token(('come',))
            self.assertTrue(token in ['pescado', 'salmón'])
    def test_generate_token(self):
        ngram = NGram(2, self.sents)
        generator = NGramGenerator(ngram)

        for i in range(100):
            # after 'el' always comes 'gato':
            token = generator.generate_token(('el', ))
            self.assertEqual(token, 'gato')

            # after 'come' may come 'pescado' or 'salmón'
            token = generator.generate_token(('come', ))
            self.assertTrue(token in ['pescado', 'salmón'])
예제 #5
0
    def test_generate_sent_2gram(self):
        ngram = NGram(2, self.sents)
        generator = NGramGenerator(ngram)

        # all the possible generated sentences for 2-grams:
        sents = [
            'el gato come pescado .',
            'la gata come salmón .',
            'el gato come salmón .',
            'la gata come pescado .',
        ]

        for i in range(100):
            sent = generator.generate_sent()
            self.assertTrue(' '.join(sent) in sents, sent)
예제 #6
0
    def test_generate_sent_2gram(self):
        ngram = NGram(2, self.sents)
        generator = NGramGenerator(ngram)

        # all the possible generated sentences for 2-grams:
        sents = [
            'el gato come pescado .',
            'la gata come salmón .',
            'el gato come salmón .',
            'la gata come pescado .',
        ]

        for i in range(100):
            sent = generator.generate_sent()
            self.assertTrue(' '.join(sent) in sents, sent)
예제 #7
0
    def test_init_3gram(self):
        ngram = NGram(3, self.sents)
        generator = NGramGenerator(ngram)

        probs = {
            ('<s>', '<s>'): {'el': 0.5, 'la': 0.5},
            ('<s>', 'el',): {'gato': 1.0},
            ('el', 'gato'): {'come': 1.0},
            ('gato', 'come'): {'pescado': 1.0},
            ('come', 'pescado'): {'.': 1.0},
            ('pescado', '.'): {'</s>': 1.0},
            ('<s>', 'la'): {'gata': 1.0},
            ('la', 'gata'): {'come': 1.0},
            ('gata', 'come'): {'salmón': 1.0},
            ('come', 'salmón'): {'.': 1.0},
            ('salmón', '.'): {'</s>': 1.0},

        }
        sorted_probs = {
            ('<s>', '<s>'): [('el', 0.5), ('la', 0.5)],
            ('<s>', 'el',): [('gato', 1.0)],
            ('el', 'gato'): [('come', 1.0)],
            ('gato', 'come'): [('pescado', 1.0)],
            ('come', 'pescado'): [('.', 1.0)],
            ('pescado', '.'): [('</s>', 1.0)],
            ('<s>', 'la'): [('gata', 1.0)],
            ('la', 'gata'): [('come', 1.0)],
            ('gata', 'come'): [('salmón', 1.0)],
            ('come', 'salmón'): [('.', 1.0)],
            ('salmón', '.'): [('</s>', 1.0)],
        }

        self.assertEqual(dict(generator.probs), probs)
        self.assertEqual(generator.sorted_probs, sorted_probs)
예제 #8
0
    def test_generate_sent_3and4gram(self):
        ngram = NGram(3, self.sents4)
        ngram2 = NGram(4, self.sents4)
        generator = NGramGenerator(ngram)
        generator2 = NGramGenerator(ngram2)

        # all the possible generated sentences for 3 or 4-grams:
        sents = [
            'la casa se construye y el corre y la gata come ensalada',
            'el corre y la gata come pescado y duerme',
            'la casa se construye y el corre y la gata come ensalada',
            'la casa se construye y el corre y la gata come pescado y duerme',
            'la casa se construye y el corre',
            'la gata come pescado y duerme',
            'el corre y la gata come ensalada',
            'el corre',
            'la gata come ensalada',
            'la casa se construye y el corre',
            'la gata come pescado y duerme',
        ]

        for i in range(1000):
            sent = generator.generate_sent()
            sent2 = generator2.generate_sent()
            self.assertTrue(' '.join(sent) in sents)
            self.assertTrue(' '.join(sent2) in sents)
예제 #9
0
    def test_generate_token_3and4gram(self):
        ngram = NGram(3, self.sents3)
        ngram2 = NGram(4, self.sents3)
        generator = NGramGenerator(ngram)
        generator2 = NGramGenerator(ngram2)

        for i in range(100):
            # after 'come pescado' always comes 'y'
            token = generator.generate_token(('come', 'pescado'))
            self.assertEqual(token, 'y')
            # after 'come pescado y' always comes 'duerme'
            token = generator2.generate_token(('come', 'pescado', 'y'))
            self.assertEqual(token, 'duerme')
            # sentence may come start with 'el' or 'la'
            token = generator.generate_token(('<s>', '<s>'))
            self.assertTrue(token in ['el', 'la'])
            token = generator2.generate_token(('<s>', '<s>', '<s>'))
            self.assertTrue(token in ['el', 'la'])
    def test_init_1gram(self):
        ngram = NGram(1, self.sents)
        generator = NGramGenerator(ngram)

        probs = {
            (): {
                'el': 1 / 12.0,
                'gato': 1 / 12.0,
                'come': 2 / 12.0,
                'pescado': 1 / 12.0,
                '.': 2 / 12.0,
                '</s>': 2 / 12.0,
                'la': 1 / 12.0,
                'gata': 1 / 12.0,
                'salmón': 1 / 12.0,
            }
        }

        self.assertEqual(dict(generator.probs), probs)
예제 #11
0
Options:
  -i <file>     Language model file.
  -n <n>        Number of sentences to generate.
  -h --help     Show this screen.
"""

import sys
sys.path.append("../../")

import pickle
from docopt import docopt
from languagemodeling.ngram import NGramGenerator

if __name__ == '__main__':

    opts = docopt(__doc__)

    n = int(opts['-n'])
    i = str(opts['-i'])
    f = open(i, 'rb')
    model = pickle.load(f)

    generator = NGramGenerator(model)

    for _ in range(n):
        sent = generator.generate_sent()
        for token in sent:
            print(token, end=" ")

        print("\n")
예제 #12
0
    n = int(opts['-n'])
    filename = opts['-i']

    # the output will be written in test/output.txt
    file_output = open(os.path.join(DEFAULT_OUTPUT_DIR, 'output.txt'), 'w')
    if filename:
        # instance an n-gram object whith n={1,2,3,4}
        # open the model to read
        file_model = open(filename, 'rb')
        # ngram is a model trained.
        ngram = pickle.load(file_model)
        # close the file
        file_model.close()
        # an instance of NGramGenerator with ngram
        generator = NGramGenerator(ngram)
        print('have just upload')
        for _ in range(0, n):
            list_sentence = generator.generate_sent()
            # join list with spaces between word
            file_output.write(' '.join(list_sentence))
        # put an EOL
        file_output.write('\r\n')
    else:
        for i in range(1, 5):
            # open the model to read n={1,2,3,4, 5, 6, 7, 8}
            file_model = open(str(i) + '-gram.txt', 'rb')
            # ngram is a model trained.
            ngram = pickle.load(file_model)
            file_model.close()
            # an instance of NGramGenerator with ngram
예제 #13
0
Generate natural language sentences using a language model.

Usage:
  generate.py -i <file> -n <n>
  generate.py -h | --help

Options:
  -i <file>     Language model file.
  -n <n>        Number of sentences to generate.
  -h --help     Show this screen.
"""

import pickle
from docopt import docopt
from languagemodeling.ngram import NGramGenerator

if __name__ == '__main__':
    opts = docopt(__doc__)
    # read options
    path = str(opts['-i'])
    n = int(opts['-n'])
    # open model file
    file = open(path, 'rb')
    # load model file
    model = pickle.load(file)
    # create generator
    generator = NGramGenerator(model)
    # print sentences while generate them.
    for _ in range(n):
        print(' '.join(generator.generate_sent()) + "\n")
예제 #14
0
  -i <file>     Language model file.
  -n <n>        Number of sentences to generate.
  -h --help     Show this screen.
"""

import sys
sys.path.append("../../")

import pickle
from docopt import docopt
from languagemodeling.ngram import NGramGenerator


if __name__ == '__main__':

    opts = docopt(__doc__)

    n = int(opts['-n'])
    i = str(opts['-i'])
    f = open(i, 'rb')
    model = pickle.load(f)

    generator = NGramGenerator(model)

    for _ in range(n):
        sent = generator.generate_sent()
        for token in sent:
            print(token, end=" ")

        print("\n")
예제 #15
0
  -h --help     Show this screen.
"""
from docopt import docopt
import pickle

import os.path
import sys
# Add ../../ to PYTHONPATH
sys.path.append(
    os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        os.pardir, os.pardir))

from languagemodeling.ngram import NGramGenerator


if __name__ == '__main__':
    opts = docopt(__doc__)
    # load the model
    filename = opts['-i']
    with open(filename, 'rb') as f:
        model = pickle.load(f)
    sys.stderr.write('Loaded model\n')
    # generate
    n = int(opts['-n'])
    generator = NGramGenerator(model)
    sys.stderr.write('Initialized generator\n')
    for i in range(n):
        print('Sentence %s:' % i)
        print(' '.join(generator.generate_sent()))
예제 #16
0
"""Generate natural language sentences using a language model.

Usage:
  generate.py -i <file> -n <n>
  generate.py -h | --help
Options:
  -i <file>     Language model file.
  -n <n>        Number of sentences to generate.
  -h --help     Show this screen.
"""
from docopt import docopt
import pickle
from languagemodeling.ngram import NGramGenerator

if __name__ == '__main__':
    opts = docopt(__doc__)

    filename = opts['-i']
    f = open(filename, 'rb')
    model = pickle.load(f)
    f.close()

    generator = NGramGenerator(model)

    for _ in range(int(opts['-n'])):
        sent = ' '.join(generator.generate_sent())
        print(sent)
        print("-------------------------------------------------------------")