Esempio n. 1
0
    def test_generate_sent_3and4gram(self):
        ngram = NGram(3, self.sents4)
        ngram2 = NGram(4, self.sents4)
        generator = NGramGenerator(ngram)
        generator2 = NGramGenerator(ngram2)

        # all the possible generated sentences for 3 or 4-grams:
        sents = [
            'la casa se construye y el corre y la gata come ensalada',
            'el corre y la gata come pescado y duerme',
            'la casa se construye y el corre y la gata come ensalada',
            'la casa se construye y el corre y la gata come pescado y duerme',
            'la casa se construye y el corre',
            'la gata come pescado y duerme',
            'el corre y la gata come ensalada',
            'el corre',
            'la gata come ensalada',
            'la casa se construye y el corre',
            'la gata come pescado y duerme',
        ]

        for i in range(1000):
            sent = generator.generate_sent()
            sent2 = generator2.generate_sent()
            self.assertTrue(' '.join(sent) in sents)
            self.assertTrue(' '.join(sent2) in sents)
    def test_generate_sent_1gram(self):
        ngram = NGram(1, self.sents)
        generator = NGramGenerator(ngram)

        voc = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'}

        for i in range(100):
            sent = generator.generate_sent()
            self.assertTrue(set(sent).issubset(voc))
Esempio n. 3
0
    def test_generate_sent_1gram(self):
        ngram = NGram(1, self.sents)
        generator = NGramGenerator(ngram)

        voc = {'el', 'gato', 'come', 'pescado', '.', 'la', 'gata', 'salmón'}

        for i in range(100):
            sent = generator.generate_sent()
            self.assertTrue(set(sent).issubset(voc))
    def test_generate_sent_2gram(self):
        ngram = NGram(2, self.sents)
        generator = NGramGenerator(ngram)

        # all the possible generated sentences for 2-grams:
        sents = [
            'el gato come pescado .',
            'la gata come salmón .',
            'el gato come salmón .',
            'la gata come pescado .',
        ]

        for i in range(100):
            sent = generator.generate_sent()
            self.assertTrue(' '.join(sent) in sents, sent)
Esempio n. 5
0
    def test_generate_sent_2gram(self):
        ngram = NGram(2, self.sents)
        generator = NGramGenerator(ngram)

        # all the possible generated sentences for 2-grams:
        sents = [
            'el gato come pescado .',
            'la gata come salmón .',
            'el gato come salmón .',
            'la gata come pescado .',
        ]

        for i in range(100):
            sent = generator.generate_sent()
            self.assertTrue(' '.join(sent) in sents, sent)
Esempio n. 6
0
Options:
  -i <file>     Language model file.
  -n <n>        Number of sentences to generate.
  -h --help     Show this screen.
"""

import sys
sys.path.append("../../")

import pickle
from docopt import docopt
from languagemodeling.ngram import NGramGenerator

if __name__ == '__main__':

    opts = docopt(__doc__)

    n = int(opts['-n'])
    i = str(opts['-i'])
    f = open(i, 'rb')
    model = pickle.load(f)

    generator = NGramGenerator(model)

    for _ in range(n):
        sent = generator.generate_sent()
        for token in sent:
            print(token, end=" ")

        print("\n")
Esempio n. 7
0
    # the output will be written in test/output.txt
    file_output = open(os.path.join(DEFAULT_OUTPUT_DIR, 'output.txt'), 'w')
    if filename:
        # instance an n-gram object whith n={1,2,3,4}
        # open the model to read
        file_model = open(filename, 'rb')
        # ngram is a model trained.
        ngram = pickle.load(file_model)
        # close the file
        file_model.close()
        # an instance of NGramGenerator with ngram
        generator = NGramGenerator(ngram)
        print('have just upload')
        for _ in range(0, n):
            list_sentence = generator.generate_sent()
            # join list with spaces between word
            file_output.write(' '.join(list_sentence))
        # put an EOL
        file_output.write('\r\n')
    else:
        for i in range(1, 5):
            # open the model to read n={1,2,3,4, 5, 6, 7, 8}
            file_model = open(str(i) + '-gram.txt', 'rb')
            # ngram is a model trained.
            ngram = pickle.load(file_model)
            file_model.close()
            # an instance of NGramGenerator with ngram
            generator = NGramGenerator(ngram)
            # tittle i-Gram
            file_output.write(str(i) + '-Gram')
Esempio n. 8
0
Generate natural language sentences using a language model.

Usage:
  generate.py -i <file> -n <n>
  generate.py -h | --help

Options:
  -i <file>     Language model file.
  -n <n>        Number of sentences to generate.
  -h --help     Show this screen.
"""

import pickle
from docopt import docopt
from languagemodeling.ngram import NGramGenerator

if __name__ == '__main__':
    opts = docopt(__doc__)
    # read options
    path = str(opts['-i'])
    n = int(opts['-n'])
    # open model file
    file = open(path, 'rb')
    # load model file
    model = pickle.load(file)
    # create generator
    generator = NGramGenerator(model)
    # print sentences while generate them.
    for _ in range(n):
        print(' '.join(generator.generate_sent()) + "\n")
Esempio n. 9
0
from docopt import docopt
from languagemodeling.ngram import NGramGenerator


if __name__ == '__main__':
    # Parseamos los argumentos, de las opciones
    opts = docopt(__doc__)

    # Cargamos las opciones ingresadas
    model_file = str(opts['-i'])
    num_sents = int(opts['-n'])

    # Abrimo el archivo que contiene el Modelo del lenguaje
    f = open(model_file, "rb")

    # Reconstruimos el objeto desde la representacion en cadena de bytes
    modelo = pickle.load(f)

    # Instanciamos un objeto NGramGenerator con el modelo obtenido
    generador = NGramGenerator(modelo)

    # Generamos un total de "num_sents" oraciones
    for _ in range(num_sents):
        sent = generador.generate_sent()
        # Unimos todos los tokens, pero separados por un espacio
        sent = " ".join(sent)
        print(sent)

    # Cerramos el archivo
    f.close()
Esempio n. 10
0
  -i <file>     Language model file.
  -n <n>        Number of sentences to generate.
  -h --help     Show this screen.
"""

import sys
sys.path.append("../../")

import pickle
from docopt import docopt
from languagemodeling.ngram import NGramGenerator


if __name__ == '__main__':

    opts = docopt(__doc__)

    n = int(opts['-n'])
    i = str(opts['-i'])
    f = open(i, 'rb')
    model = pickle.load(f)

    generator = NGramGenerator(model)

    for _ in range(n):
        sent = generator.generate_sent()
        for token in sent:
            print(token, end=" ")

        print("\n")
Esempio n. 11
0
  -h --help     Show this screen.
"""
from docopt import docopt
import pickle

import os.path
import sys
# Add ../../ to PYTHONPATH
sys.path.append(
    os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        os.pardir, os.pardir))

from languagemodeling.ngram import NGramGenerator


if __name__ == '__main__':
    opts = docopt(__doc__)
    # load the model
    filename = opts['-i']
    with open(filename, 'rb') as f:
        model = pickle.load(f)
    sys.stderr.write('Loaded model\n')
    # generate
    n = int(opts['-n'])
    generator = NGramGenerator(model)
    sys.stderr.write('Initialized generator\n')
    for i in range(n):
        print('Sentence %s:' % i)
        print(' '.join(generator.generate_sent()))
Esempio n. 12
0
"""Generate natural language sentences using a language model.

Usage:
  generate.py -i <file> -n <n>
  generate.py -h | --help
Options:
  -i <file>     Language model file.
  -n <n>        Number of sentences to generate.
  -h --help     Show this screen.
"""
from docopt import docopt
import pickle
from languagemodeling.ngram import NGramGenerator

if __name__ == '__main__':
    opts = docopt(__doc__)

    filename = opts['-i']
    f = open(filename, 'rb')
    model = pickle.load(f)
    f.close()

    generator = NGramGenerator(model)

    for _ in range(int(opts['-n'])):
        sent = ' '.join(generator.generate_sent())
        print(sent)
        print("-------------------------------------------------------------")