def load_gre_sentence(sentence_path, lsi, word2id, algorithm=TOTAL_SIMILARITY): """load all gre sentence tasks and select answers based on LSA""" file = open(sentence_path); taskScore = []; taskCount = 0; for line in file: if "" == line.strip(): if len(taskScore) == 5: maxScore, index = mathutils.max(taskScore); canswer = index2answer(index) print 'score : ' + str(maxScore) + ' answer: ' + canswer; cal_ans.append(canswer); taskScore = []; taskCount = taskCount + 1; pass; else: text = "" sentence = ""; li = -1; options = []; isInParenthese = False; for i in range(0, len(line)): if line[i] == '(': li = i; text = text + line[i] isInParenthese = True; elif line[i] == ')': options.append(line[li+1:i]); isInParenthese = False; elif isInParenthese == False: sentence = sentence + line[i]; text = text + line[i] context = nltk.word_tokenize(sentence); blanks = []; for blank in options: temp = nltk.word_tokenize(blank); # print temp; blanks.append(temp); if algorithm == TOTAL_SIMILARITY: taskScore.append(calculate_total_similarity(lsi, word2id=word2id, blanks=blanks, context=context)); elif algorithm == TOTAL_SIMILARITY_WITH_COMBINATION: taskScore.append(calculate_total_similarity_with_combination(lsi, word2id=word2id, blanks=blanks, context=context)); elif algorithm == TOTAL_SIMILARITY_K_MAX: taskScore.append(calculate_total_similarity_by_k_max(lsi, word2id=word2id, blanks=blanks, context=context)); elif algorithm == TOTAL_SIMILARITY_WITH_RAKE: taskScore.append(calculate_total_similarity_with_rake(lsi, text, word2id, blanks)) file.close(); print taskCount;
def load_gre_sentence(sentence_path, lsi, word2id, algorithm=TOTAL_SIMILARITY,k=11): """load all gre sentence tasks and select answers based on LSA""" file = open(sentence_path); taskScore = []; taskCount = 0; f = open('/Users/junchen/Documents/CSCI544/project/wiki_data/w2v_score.txt','w') for line in file: if "" == line.strip(): if len(taskScore) == 5: s = 0 for t in taskScore: s += t + 1.0 for t in range(len(taskScore)): f.write(str((taskScore[t] + 1.0)/s) + '\r\n') f.write('\r\n') maxScore, index = mathutils.max(taskScore); canswer = index2answer(index) #print 'score : ' + str(maxScore) + ' answer: ' + canswer; if len(cal_ans) == len(answer): cal_ans[taskCount] = canswer else: cal_ans.append(canswer); taskScore = []; taskCount = taskCount + 1; pass; else: text = ""; sentence = ""; li = -1; options = []; isInParenthese = False; for i in range(0, len(line)): if line[i] == '(': li = i; text = text + line[i] isInParenthese = True; elif line[i] == ')': options.append(line[li+1:i]); isInParenthese = False; elif isInParenthese == False: sentence = sentence + line[i]; text = text + line[i] context = nltk.word_tokenize(sentence); blanks = []; for blank in options: temp = nltk.word_tokenize(blank); # print temp; blanks.append(temp); if algorithm == TOTAL_SIMILARITY: taskScore.append(calculate_total_similarity(lsi, word2id=word2id, blanks=blanks, context=context)); elif algorithm == TOTAL_SIMILARITY_WITH_COMBINATION: taskScore.append(calculate_total_similarity_with_combination(lsi, word2id=word2id, blanks=blanks, context=context)); elif algorithm == K_MAX_TOTAL_SIMILARITY: taskScore.append(calculate_total_similarity_by_k_max(lsi, word2id=word2id, blanks=blanks, context=context, k=k)); elif algorithm == TOTAL_SIMILARITY_WITH_RAKE: taskScore.append(calculate_total_similarity_with_rake(lsi, text, word2id, blanks,k)) file.close(); f.close() print taskCount;