def test(self, model): self.model = model self.beliefs = [] self.partitions = util.generatePossiblePartitions(self.word, self.pronunciation) for p in self.partitions: prop = 1.0 for k, f in p: if k in model: prop *= model[k].prob(f) self.beliefs.append(prop) util.normalize_vector(self.beliefs) heuritics = util.omegaHeuristics(self.partitions) beliefMatrix = sorted([(belief, heuritics[i], i) for i, belief in enumerate(self.beliefs)], reverse=True) self.bestPartition = self.partitions[beliefMatrix[0][2]] if len(beliefMatrix) == 1: self.confidence = 10.0 else: self.confidence = beliefMatrix[0][0] - beliefMatrix[1][0] self.correctAnswer = True for k, f in self.bestPartition: if self.mapping[k] != f: self.correctAnswer = False break
def updateWeightVectorOmega(self, smoothing = 0.5): self.omegas = [] for p in self.partitions: prop = 1.0 for k, f in p: prop *= (self._verticesMap[k].prob(f) + smoothing) self.omegas.append(prop) util.normalize_vector(self.omegas) # find the most probable partition _, index = max([(omega, i) for i, omega in enumerate(self.omegas)]) self.bestPartition = self.partitions[index]
def updateWeightVectorAlpha(self): self.alphas = [] furiganas = [] for factor in self.factors: furiganas.extend(factor.mostProbableFuriganas(self.kanji)) furigana_counter = Counter(furiganas) for factor in self.factors: alpha = 0.0 for f in factor.mostProbableFuriganas(self.kanji): alpha += 1.0 / (furigana_counter[f] + 1) self.alphas.append(alpha) util.normalize_vector(self.alphas)
def baseline_test(self): self.partitions = util.generatePossiblePartitions(self.word, self.pronunciation) self.beliefs = util.omegaHeuristics(self.partitions) util.normalize_vector(self.beliefs) beliefMatrix = sorted([(belief, i) for i, belief in enumerate(self.beliefs)], reverse=True) self.bestPartition = self.partitions[beliefMatrix[0][1]] if len(beliefMatrix) == 1: self.confidence = 10.0 else: self.confidence = beliefMatrix[0][0] - beliefMatrix[1][0] self.correctAnswer = True for k, f in self.bestPartition: if self.mapping[k] != f: self.correctAnswer = False break