Python Unigram Beispiele

Programmiersprache: Python

Namespace / Paketname: lm

Klasse / Typ: Unigram

Beispiele auf hotexamples.com: 5

Python Unigram - 5 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die lm.Unigram, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Unigram(6)

fit_corpus(5)

perplexity(4)

vocab(4)

Beispiel #1

Datei anzeigen

Datei: data_generator.py Projekt: Rshcaroline/UCSD-CSE256-Statistical-NLP

def learn_unigram(data, verbose=True):
    """Learns a unigram model from data.train.

    It also evaluates the model on data.dev and data.test, along with generating
    some sample sentences from the model.
    """
    from lm import Unigram
    unigram = Unigram()
    unigram.fit_corpus(data.train)
    if verbose:
        print("vocab:", len(unigram.vocab()))
        # evaluate on train, test, and dev
        print("train:", unigram.perplexity(data.train))
        print("dev  :", unigram.perplexity(data.dev))
        print("test :", unigram.perplexity(data.test))
        from generator import Sampler
        sampler = Sampler(unigram)
        print("sample 1: ", " ".join(str(x) for x in sampler.sample_sentence(['The', 'president'])))
        print("sample 2: ", " ".join(str(x) for x in sampler.sample_sentence(['This', 'university'])))
        print("sample 3: ", " ".join(str(x) for x in sampler.sample_sentence(['The', 'United', 'States'])))
        print("sample 4: ", " ".join(str(x) for x in sampler.sample_sentence(['An', 'explosion'])))
        print("sample 5: ", " ".join(str(x) for x in sampler.sample_sentence(['To', 'be', 'or', 'to'])))
        print("sample 6: ", " ".join(str(x) for x in sampler.sample_sentence(['This', 'is', 'awesome'])))
        print("sample 7: ", " ".join(str(x) for x in sampler.sample_sentence(['I', 'am', 'sorry'])))
        print("sample 8: ", " ".join(str(x) for x in sampler.sample_sentence(['Today', 'the', 'chair', 'of'])))
        print("sample 9: ", " ".join(str(x) for x in sampler.sample_sentence(['Hello', 'I', 'came', 'from'])))
        print("sample 10: ", " ".join(str(x) for x in sampler.sample_sentence(['I', 'major', 'in', 'Computer', 'Science'])))
    return unigram

Beispiel #2

Datei anzeigen

def learn_unigram(data):
    """Learns a unigram model from data.train.

    It also evaluates the model on data.dev and data.test, along with generating
    some sample sentences from the model.
    """
    from lm import Unigram
    unigram = Unigram()
    unigram.fit_corpus(data.train)
    print("vocab:", len(unigram.vocab()))
    # evaluate on train, test, and dev
    # print("train:", unigram.perplexity(data.train))
    # print("dev  :", unigram.perplexity(data.dev))
    # print("test :", unigram.perplexity(data.test))
    # from generator import Sampler
    # sampler = Sampler(unigram)
    # print("sample: ", " ".join(str(x) for x in sampler.sample_sentence([])))
    # print("sample: ", " ".join(str(x) for x in sampler.sample_sentence([])))
    # print("sample: ", " ".join(str(x) for x in sampler.sample_sentence([])))
    return unigram

Beispiel #3

Datei anzeigen

def learn_unigram(data, verbose=True):
    """Learns a unigram model from data.train.

    It also evaluates the model on data.dev and data.test, along with generating
    some sample sentences from the model.
    """
    from lm import Unigram
    unigram = Unigram()
    unigram.fit_corpus(data.train)
    if verbose:
        print("vocab:", len(unigram.vocab()))
        # evaluate on train, test, and dev
        print("train:", unigram.perplexity(data.train))
        print("dev  :", unigram.perplexity(data.dev))
        print("test :", unigram.perplexity(data.test))
    return unigram

Beispiel #4

Datei anzeigen

    incl_eos determines whether the space of words should include EOS or not.
    """
        wps = []
        tot = -np.inf  # this is the log (total mass)
        for w in self.lm.vocab():
            if not incl_eos and w == "END_OF_SENTENCE":
                continue
            lp = self.lm.cond_logprob(w, prev, 0)
            wps.append([w, lp / self.temp])
            tot = np.logaddexp2(lp / self.temp, tot)
        p = random.random()
        word = random.choice(wps)[0]
        s = -np.inf  # running mass
        for w, lp in wps:
            s = np.logaddexp2(s, lp)
            if p < pow(2, s - tot):
                word = w
                break
        return word


if __name__ == "__main__":
    from lm import Unigram
    unigram = Unigram()
    corpus = [["sam", "i", "am"]]
    unigram.fit_corpus(corpus)
    print(unigram.model)
    sampler = Sampler(unigram)
    for i in range(10):
        print(i, ":", " ".join(str(x) for x in sampler.sample_sentence([])))

Beispiel #5

Datei anzeigen

from data import run_model
from lm import Unigram, NgramNoUnk, NgramUnk
import sys


def check_params(params):
  if (
      (len(params) <= 1)
      or (params[1] not in ('unigram', 'ngram'))
      or (params[1] == 'ngram' and len(params) < 4)
  ):
    print(f'Usage: {params[0]} <unigram|ngram> [n λ [voc_ratio]]')
    sys.exit(-1)


if __name__ == '__main__':
  params = sys.argv
  check_params(params)

  model = params[1]
  if model == 'unigram':
    run_model(lambda: Unigram(), 'results/unigram')
  else:
    n = int(params[2])
    λ = float(params[3])
    if len(params) == 5:
      voc_ratio = float(params[4])
      run_model(lambda: NgramUnk(n, λ, voc_ratio), f'results/ngram_n={n}_l={λ}_voc={voc_ratio}')
    else:
      run_model(lambda: NgramNoUnk(n, λ), f'results/ngram_n={n}_l={λ}')