def learn_unigram(data, verbose=True):
    """Learns a unigram model from data.train.

    It also evaluates the model on data.dev and data.test, along with generating
    some sample sentences from the model.
    """
    from lm import Unigram
    unigram = Unigram()
    unigram.fit_corpus(data.train)
    if verbose:
        print("vocab:", len(unigram.vocab()))
        # evaluate on train, test, and dev
        print("train:", unigram.perplexity(data.train))
        print("dev  :", unigram.perplexity(data.dev))
        print("test :", unigram.perplexity(data.test))
        from generator import Sampler
        sampler = Sampler(unigram)
        print("sample 1: ", " ".join(str(x) for x in sampler.sample_sentence(['The', 'president'])))
        print("sample 2: ", " ".join(str(x) for x in sampler.sample_sentence(['This', 'university'])))
        print("sample 3: ", " ".join(str(x) for x in sampler.sample_sentence(['The', 'United', 'States'])))
        print("sample 4: ", " ".join(str(x) for x in sampler.sample_sentence(['An', 'explosion'])))
        print("sample 5: ", " ".join(str(x) for x in sampler.sample_sentence(['To', 'be', 'or', 'to'])))
        print("sample 6: ", " ".join(str(x) for x in sampler.sample_sentence(['This', 'is', 'awesome'])))
        print("sample 7: ", " ".join(str(x) for x in sampler.sample_sentence(['I', 'am', 'sorry'])))
        print("sample 8: ", " ".join(str(x) for x in sampler.sample_sentence(['Today', 'the', 'chair', 'of'])))
        print("sample 9: ", " ".join(str(x) for x in sampler.sample_sentence(['Hello', 'I', 'came', 'from'])))
        print("sample 10: ", " ".join(str(x) for x in sampler.sample_sentence(['I', 'major', 'in', 'Computer', 'Science'])))
    return unigram
Exemple #2
0
def learn_unigram(data, verbose=True):
    """Learns a unigram model from data.train.

    It also evaluates the model on data.dev and data.test, along with generating
    some sample sentences from the model.
    """
    from lm import Unigram
    unigram = Unigram()
    unigram.fit_corpus(data.train)
    if verbose:
        print("vocab:", len(unigram.vocab()))
        # evaluate on train, test, and dev
        print("train:", unigram.perplexity(data.train))
        print("dev  :", unigram.perplexity(data.dev))
        print("test :", unigram.perplexity(data.test))
    return unigram
Exemple #3
0
def learn_unigram(data):
    """Learns a unigram model from data.train.

    It also evaluates the model on data.dev and data.test, along with generating
    some sample sentences from the model.
    """
    from lm import Unigram
    unigram = Unigram()
    unigram.fit_corpus(data.train)
    print("vocab:", len(unigram.vocab()))
    # evaluate on train, test, and dev
    print("train:", unigram.perplexity(data.train))
    print("dev  :", unigram.perplexity(data.dev))
    print("test :", unigram.perplexity(data.test))
    from generator import Sampler
    sampler = Sampler(unigram)
    for _ in range(2):
        print("sample: ", " ".join(str(x) for x in sampler.sample_sentence([], max_length=20)))
    return unigram
def learn_unigram(data, verbose=True):
    """Learns a unigram model from data.train.

    It also evaluates the model on data.dev and data.test, along with generating
    some sample sentences from the model.
    """
    from lm import Unigram
    unigram = Unigram()
    unigram.fit_corpus(data.train)
    if verbose:
        print("vocab:", len(unigram.vocab()))
        # evaluate on train, test, and dev
        print("train:", unigram.perplexity(data.train))
        print("dev  :", unigram.perplexity(data.dev))
        print("test :", unigram.perplexity(data.test))
        from generator import Sampler
        sampler = Sampler(unigram)
        print("sample 1: ", " ".join(str(x) for x in sampler.sample_sentence(['The'])))
        print("sample 2: ", " ".join(str(x) for x in sampler.sample_sentence(['They'])))
    return unigram