def classify_questions(n=1): questions={} for t in get_question_types(n): questions[t]=[] for question in read_questions.read_questions_no_answers(): questions[firstGram(question[1],n)].append(question[0]) return questions
def classify_questions(n=1): questions = {} for t in get_question_types(n): questions[t] = [] for question in read_questions.read_questions_no_answers(): questions[firstGram(question[1], n)].append(question[0]) return questions
def get_answer(first = 380, last = 399): """ the process by which the baseline finds answers from the corpus first : an integer corresponding to the first question id (inclusive) to answer last : an integer corresonding to the last question id (inclusive) to answer returns : an int list and string list of question id's and answers """ q_ids = [] ans_text = [] # make sure the parameters are good if first > last: last, first = first, last # read in all the questions and iterate through them questions = read_questions.read_questions_no_answers() questions = [q for q in questions if int(q[0]) >= first and int(q[0]) <= last] for question in questions: q_id = int(question[0]) topdoc = init.get_corpus(q_id) doc_nums = topdoc.keys() # baseline QA system answer process right here... for key in doc_nums[:5]: doc_text = topdoc[key].split() # find a random word from the question qs = question[1].split() qword = qs[random.randint(0, len(qs) - 1)] # pull out sentences from docs that have that word positions = [i for i,x in enumerate(doc_text) if x == qword] # get a random position if len(positions) == 0: positions = [len(doc_text) / 2] pos = positions[random.randint(0, len(positions) - 1)] q_ids.append(q_id) ans_text.append(' '.join(doc_text[(pos - 5):(pos + 5)])) return q_ids, ans_text
def filter_chunks(q_id): answers = chunker.run(q_id) my_answers = [] qList = read_questions.read_questions_no_answers() qDict = {} b = 0 for q in qList: for q2 in q: if b == 0: qN = q2 b = 1 else: qDict[qN] = q2 b = 0 for key in qDict: if key == str(q_id): for ansCandidate in answers: print ansCandidate passed = pos.pos_test(qDict[key], ansCandidate) if (passed != 0): my_answers.append(ansCandidate) return my_answers
def filter_chunks(q_id): answers = chunker.run(q_id) my_answers = [] qList = read_questions.read_questions_no_answers() qDict = {} b = 0 for q in qList: for q2 in q: if b == 0: qN = q2 b = 1 else: qDict[qN] = q2 b = 0 for key in qDict: if key == str(q_id): for ansCandidate in answers: print ansCandidate passed = pos.pos_test(qDict[key], ansCandidate) if (passed !=0): my_answers.append(ansCandidate) return my_answers
def get_question_types(n=1): #Unigrams by default questions = read_questions.read_questions_no_answers() types = [] for question in questions: a = firstGram(question[1], n) if a not in types: types.append(a) return types
def get_question_types(n=1): #Unigrams by default questions=read_questions.read_questions_no_answers() types=[] for question in questions: a=firstGram(question[1],n) if a not in types: types.append(a) return types
def rewriteQuestionsList(): # calls the dictionary version and plugs it into a list of lists (like # read_questions is presented) qDict = rewriteQuestionsDict(read_questions.read_questions_no_answers()) questionsList = [] for key in qDict: questions = [] questions.append(key) questions.append(qDict[key]) questionsList.append(questions) return questionsList
result = 1 if features == "VP": result = 0.1 return result if __name__ == "__main__": testQ = 213 answers = run(testQ) #print answers #for ansCandidate in answers: #print ansCandidate[0], ansCandidate[3] # Again I stick the questions into a dictionary qList = read_questions.read_questions_no_answers() qDict = {} b = 0 for q in qList: for q2 in q: if b == 0: qN = q2 b = 1 else: qDict[qN] = q2 b = 0 for key in qDict: #print key, qDict[key] # I'm just going to pick one question to debug if key == str(testQ):
#!/usr/bin/env python #import sys #sys.path.append('./modules') #import monte import chunker import numpy as np import mlpy import random #Just for generating fake data import tom import check_answers import read_questions questionDict = dict(read_questions.read_questions_no_answers()) def run_evaluators(candidates, evaluators): #candidate = list of the question-candidate indexes confidence = [] for candidate in candidates: # print candidate[0] candidateConfidence = [] for evaluator in evaluators: foo = evaluator(questionDict[str(candidate[4])], candidate) candidateConfidence = candidateConfidence + list(foo) confidence.append(candidateConfidence) return confidence def train(model, correctness, features): ''' Train a model for classifying answers as correct or not
#!/usr/bin/env python #import sys #sys.path.append('./modules') #import monte import chunker import numpy as np import mlpy import random #Just for generating fake data import tom import check_answers import read_questions questionDict=dict(read_questions.read_questions_no_answers()) def run_evaluators(candidates,evaluators): #candidate = list of the question-candidate indexes confidence = [] for candidate in candidates: # print candidate[0] candidateConfidence=[] for evaluator in evaluators: foo=evaluator(questionDict[str(candidate[4])],candidate) candidateConfidence=candidateConfidence+list(foo) confidence.append(candidateConfidence) return confidence def train(model,correctness,features): ''' Train a model for classifying answers as correct or not based on the confidence measures from various QA methods. This should support paramaters at some point.