def get_golden_ranking(question): ideal_answer = unicode(question.ideal_answer[0]).encode("ascii", 'ignore') sentences = question.sentences ranked_sentences = RM.get_ranked_sentences(question_text=ideal_answer, sentences=sentences, retrieval_algo='BM25') return ranked_sentences
def get_golden_ranking(question): ideal_answer = question['ideal_answer'] sentences = RM.get_sentences(question['snippets']) sentences = RM.preprocess_sentences(sentences) ranked_sentences = RM.get_ranked_sentences(question_text=ideal_answer, sentences=sentences, retrieval_algo='BM25') return ranked_sentences
def ranked_sentences(self): scores = {} for sentence, score in retrieval_model.get_ranked_sentences( self.question, self.snippet_sentences, BM25): scores[sentence] = [score] for sentence, score in retrieval_model.get_ranked_sentences( self.question, self.snippet_sentences, INDRI): scores[sentence].append(score) all_sentences = [(bm25_score, indri_score, s) for s, [bm25_score, indri_score] in scores.items()] all_sentences = sorted(all_sentences, reverse=True) sentences = [{ TEXT: s, BM25: bm25_score, INDRI: indri_score } for (bm25_score, indri_score, s) in all_sentences] return sentences
def create_feature_vectors(question): sentences = set(question.sentences) feature_vectors = [] # ranked sentences also gives a score which can be used as feature ranked_sentences_bm25 = RM.get_ranked_sentences(question_text=question.question_text, sentences=sentences, retrieval_algo='BM25') # ranked sentences also gives a score which can be used as feature ranked_sentences_Indri = RM.get_ranked_sentences(question_text=question.question_text, sentences=sentences, retrieval_algo='Indri') # TO DO @Gabe, above results can be used for LeToR return feature_vectors