Ejemplo n.º 1
0
def emvowel1(tokens, prev=sentence_token):
    if not tokens: return ()
    first, rest = tokens[0], tokens[1:]
    return max(((word,) + emvowel1(rest, word)
                for word in all_emvowelings(first)),
               key=lambda words: pdist.product(cPw(v, u)
                                               for u, v in bigrams((prev,) + words)))
Ejemplo n.º 2
0
 def ev(tokens, prev):
     if not tokens: return 0, ()
     first_token, rest_tokens = tokens[0], tokens[1:]
     return max((log10(cPw(word, prev)) + logp_rest,
                 (word,) + rest)
                for word in all_emvowelings(first_token)
                for logp_rest, rest in [ev(rest_tokens, word)])
Ejemplo n.º 3
0
def emvowel1(tokens, prev=sentence_token):
    if not tokens: return ()
    first, rest = tokens[0], tokens[1:]
    return max(
        ((word, ) + emvowel1(rest, word) for word in all_emvowelings(first)),
        key=lambda words: pdist.product(
            cPw(v, u) for u, v in bigrams((prev, ) + words)))
Ejemplo n.º 4
0
def greedy_emvowel2(tokens):
    rv = []
    prev = sentence_token
    for t in tokens:
        prev = max(all_emvowelings(t),
                   key=lambda candidate: pdist.cPw(candidate, prev))
        rv.append(prev)
    return rv
Ejemplo n.º 5
0
def bigram_prob(words):
    return product(
        pdist.cPw(word, prev) for prev, word in zip(['<S>'] + words, words))
Ejemplo n.º 6
0
def score(word, prev):
    if word[0].isalpha():
        v = -log10(pdist.cPw(word.lower(), prev.lower()))
        #return (0.5 * v) / len(word)
        return (0.5 * v + 0.0) / (2.0 + len(word))
    return 0.3
Ejemplo n.º 7
0
def score(word, prev):
    if word[0].isalpha():
        v = -log10(pdist.cPw(word.lower(), prev.lower()))
        #return (0.5 * v) / len(word)
        return (0.5 * v + 0.0) / (2.0 + len(word))
    return 0.3
Ejemplo n.º 8
0
def emvoweling(tokens):
    if not tokens:
        return [(0.0, [sentence_token])]
    def extend((logprob, words), word):
        return (logprob + math.log10(pdist.cPw(word, words[-1])),
                words + [word])
Ejemplo n.º 9
0
 def ev(tokens, prev):
     if not tokens: return 0, ()
     first_token, rest_tokens = tokens[0], tokens[1:]
     return max((log10(cPw(word, prev)) + logp_rest, (word, ) + rest)
                for word in all_emvowelings(first_token)
                for logp_rest, rest in [ev(rest_tokens, word)])
Ejemplo n.º 10
0
def bigram_prob(words):
    return product(pdist.cPw(word, prev)
                   for prev, word in zip(['<S>'] + words, words))