Ejemplo n.º 1
0
    def __init__(self,beta=10.0):
        self.CMU,self.CMUX = load_CMU()
        self.A = ARPAstat(1.0)

        #self.min_cutoff = 1.25
        self.min_cutoff = 1.50
        self.beta  = beta
Ejemplo n.º 2
0
class homophonic_word_translate(object):

    def __init__(self,beta=10.0):
        self.CMU,self.CMUX = load_CMU()
        self.A = ARPAstat(1.0)

        #self.min_cutoff = 1.25
        self.min_cutoff = 1.50
        self.beta  = beta

    def __call__(self,w1):
        if w1 not in self.CMU:
            return w1
        
        c1 = self.CMU[w1]
       
        CLOSE = {}
        for w2 in self.CMUX[len(c1)]:
            if w2 not in VALID_WORDS: continue
            c2 = self.CMU[w2]
            dx = sum([self.A.delta(x,y) for x,y in zip(c1,c2)]) / len(c1)
            if dx < self.min_cutoff and w1!=w2:
                CLOSE[w2] = dx

        # No match was found, return original wor
        if not CLOSE:
            return w1

        WORDS = CLOSE.keys()
        E  = np.array([CLOSE[word] for word in WORDS])
        Z  = np.exp(-E*self.beta)
        prob = np.exp(-E*self.beta)/Z.sum()
        return np.random.choice(WORDS, p=prob)