def emvowel1(tokens, prev=sentence_token): if not tokens: return () first, rest = tokens[0], tokens[1:] return max(((word,) + emvowel1(rest, word) for word in all_emvowelings(first)), key=lambda words: pdist.product(cPw(v, u) for u, v in bigrams((prev,) + words)))
def ev(tokens, prev): if not tokens: return 0, () first_token, rest_tokens = tokens[0], tokens[1:] return max((log10(cPw(word, prev)) + logp_rest, (word,) + rest) for word in all_emvowelings(first_token) for logp_rest, rest in [ev(rest_tokens, word)])
def emvowel1(tokens, prev=sentence_token): if not tokens: return () first, rest = tokens[0], tokens[1:] return max( ((word, ) + emvowel1(rest, word) for word in all_emvowelings(first)), key=lambda words: pdist.product( cPw(v, u) for u, v in bigrams((prev, ) + words)))
def greedy_emvowel2(tokens): rv = [] prev = sentence_token for t in tokens: prev = max(all_emvowelings(t), key=lambda candidate: pdist.cPw(candidate, prev)) rv.append(prev) return rv
def bigram_prob(words): return product( pdist.cPw(word, prev) for prev, word in zip(['<S>'] + words, words))
def score(word, prev): if word[0].isalpha(): v = -log10(pdist.cPw(word.lower(), prev.lower())) #return (0.5 * v) / len(word) return (0.5 * v + 0.0) / (2.0 + len(word)) return 0.3
def emvoweling(tokens): if not tokens: return [(0.0, [sentence_token])] def extend((logprob, words), word): return (logprob + math.log10(pdist.cPw(word, words[-1])), words + [word])
def ev(tokens, prev): if not tokens: return 0, () first_token, rest_tokens = tokens[0], tokens[1:] return max((log10(cPw(word, prev)) + logp_rest, (word, ) + rest) for word in all_emvowelings(first_token) for logp_rest, rest in [ev(rest_tokens, word)])
def bigram_prob(words): return product(pdist.cPw(word, prev) for prev, word in zip(['<S>'] + words, words))