def learn_unigram(data, verbose=True): """Learns a unigram model from data.train. It also evaluates the model on data.dev and data.test, along with generating some sample sentences from the model. """ from lm import Unigram unigram = Unigram() unigram.fit_corpus(data.train) if verbose: print("vocab:", len(unigram.vocab())) # evaluate on train, test, and dev print("train:", unigram.perplexity(data.train)) print("dev :", unigram.perplexity(data.dev)) print("test :", unigram.perplexity(data.test)) from generator import Sampler sampler = Sampler(unigram) print("sample 1: ", " ".join(str(x) for x in sampler.sample_sentence(['The', 'president']))) print("sample 2: ", " ".join(str(x) for x in sampler.sample_sentence(['This', 'university']))) print("sample 3: ", " ".join(str(x) for x in sampler.sample_sentence(['The', 'United', 'States']))) print("sample 4: ", " ".join(str(x) for x in sampler.sample_sentence(['An', 'explosion']))) print("sample 5: ", " ".join(str(x) for x in sampler.sample_sentence(['To', 'be', 'or', 'to']))) print("sample 6: ", " ".join(str(x) for x in sampler.sample_sentence(['This', 'is', 'awesome']))) print("sample 7: ", " ".join(str(x) for x in sampler.sample_sentence(['I', 'am', 'sorry']))) print("sample 8: ", " ".join(str(x) for x in sampler.sample_sentence(['Today', 'the', 'chair', 'of']))) print("sample 9: ", " ".join(str(x) for x in sampler.sample_sentence(['Hello', 'I', 'came', 'from']))) print("sample 10: ", " ".join(str(x) for x in sampler.sample_sentence(['I', 'major', 'in', 'Computer', 'Science']))) return unigram
def learn_unigram(data, verbose=True): """Learns a unigram model from data.train. It also evaluates the model on data.dev and data.test, along with generating some sample sentences from the model. """ from lm import Unigram unigram = Unigram() unigram.fit_corpus(data.train) if verbose: print("vocab:", len(unigram.vocab())) # evaluate on train, test, and dev print("train:", unigram.perplexity(data.train)) print("dev :", unigram.perplexity(data.dev)) print("test :", unigram.perplexity(data.test)) return unigram
def learn_unigram(data): """Learns a unigram model from data.train. It also evaluates the model on data.dev and data.test, along with generating some sample sentences from the model. """ from lm import Unigram unigram = Unigram() unigram.fit_corpus(data.train) print("vocab:", len(unigram.vocab())) # evaluate on train, test, and dev print("train:", unigram.perplexity(data.train)) print("dev :", unigram.perplexity(data.dev)) print("test :", unigram.perplexity(data.test)) from generator import Sampler sampler = Sampler(unigram) for _ in range(2): print("sample: ", " ".join(str(x) for x in sampler.sample_sentence([], max_length=20))) return unigram
def learn_unigram(data, verbose=True): """Learns a unigram model from data.train. It also evaluates the model on data.dev and data.test, along with generating some sample sentences from the model. """ from lm import Unigram unigram = Unigram() unigram.fit_corpus(data.train) if verbose: print("vocab:", len(unigram.vocab())) # evaluate on train, test, and dev print("train:", unigram.perplexity(data.train)) print("dev :", unigram.perplexity(data.dev)) print("test :", unigram.perplexity(data.test)) from generator import Sampler sampler = Sampler(unigram) print("sample 1: ", " ".join(str(x) for x in sampler.sample_sentence(['The']))) print("sample 2: ", " ".join(str(x) for x in sampler.sample_sentence(['They']))) return unigram