class SentencePairScore(ScoreFunction): """ This class provides a score of how close two sentences are to being translations of each other. """ SCORE_MULTIPLIER = 3 def __init__(self): super(SentencePairScore, self).__init__(0, 1) self.classifier = None self.sign = 1 def train(self, pairs, word_score_function): """ Trains the sentence pair likelihood score using examples. `pairs` is an interable of `SentencePair` instances. `word_score_function` is an instance of ScoreFunction, perhaps even an instance of `WordPairScore`. """ pairs = list(pairs) self.problem = SentencePairScoreProblem(word_score_function) self.classifier = SVMClassifier(pairs, self.problem) class_ = None for a, b in pairs: sent = SentencePair(a, b) score = self.classifier.score(sent) if score != 0: class_ = bool(self.classifier.classify(sent)[0]) if (score > 0 and class_ is True) or \ (score < 0 and class_ is False): self.sign = -1 break if class_ is None: raise ValueError("Cannot infer sign with this data") def __call__(self, a, b): """ Returns a score representing how good a translation sentence b is of sentence a. """ if self.classifier is None: raise LookupError("Score not trained or loaded yet") a = SentencePair(a, b) score = self.classifier.score(a) * self.sign result = self.logistic_function(score * SentencePairScore.SCORE_MULTIPLIER) assert self.min_bound <= result <= self.max_bound return result def logistic_function(self, x): """ See: http://en.wikipedia.org/wiki/Logistic_function""" return 1 / (1 + math.e ** (-x)) @property def word_pair_score(self): return self.classifier.problem.word_pair_score
def train(self, pairs, word_score_function): """ Trains the sentence pair likelihood score using examples. `pairs` is an interable of `SentencePair` instances. `word_score_function` is an instance of ScoreFunction, perhaps even an instance of `WordPairScore`. """ pairs = list(pairs) self.problem = SentencePairScoreProblem(word_score_function) self.classifier = SVMClassifier(pairs, self.problem) class_ = None for a, b in pairs: sent = SentencePair(a, b) score = self.classifier.score(sent) if score != 0: class_ = bool(self.classifier.classify(sent)[0]) if (score > 0 and class_ is True) or \ (score < 0 and class_ is False): self.sign = -1 break if class_ is None: raise ValueError("Cannot infer sign with this data")