def __init__(self,beta=10.0): self.CMU,self.CMUX = load_CMU() self.A = ARPAstat(1.0) #self.min_cutoff = 1.25 self.min_cutoff = 1.50 self.beta = beta
class homophonic_word_translate(object): def __init__(self,beta=10.0): self.CMU,self.CMUX = load_CMU() self.A = ARPAstat(1.0) #self.min_cutoff = 1.25 self.min_cutoff = 1.50 self.beta = beta def __call__(self,w1): if w1 not in self.CMU: return w1 c1 = self.CMU[w1] CLOSE = {} for w2 in self.CMUX[len(c1)]: if w2 not in VALID_WORDS: continue c2 = self.CMU[w2] dx = sum([self.A.delta(x,y) for x,y in zip(c1,c2)]) / len(c1) if dx < self.min_cutoff and w1!=w2: CLOSE[w2] = dx # No match was found, return original wor if not CLOSE: return w1 WORDS = CLOSE.keys() E = np.array([CLOSE[word] for word in WORDS]) Z = np.exp(-E*self.beta) prob = np.exp(-E*self.beta)/Z.sum() return np.random.choice(WORDS, p=prob)