Esempio n. 1
0
def classify_questions(n=1):
	questions={}
	for t in get_question_types(n):
		questions[t]=[]
	for question in read_questions.read_questions_no_answers():
		questions[firstGram(question[1],n)].append(question[0])
	return questions
Esempio n. 2
0
def classify_questions(n=1):
    questions = {}
    for t in get_question_types(n):
        questions[t] = []
    for question in read_questions.read_questions_no_answers():
        questions[firstGram(question[1], n)].append(question[0])
    return questions
Esempio n. 3
0
def get_answer(first = 380, last = 399):
    """ the process by which the baseline finds answers from the corpus
        first : an integer corresponding to the first question id (inclusive) to answer
        last : an integer corresonding to the last question id (inclusive) to answer
        returns : an int list and string list of question id's and answers """
    
    q_ids = []
    ans_text = []

    # make sure the parameters are good
    if first > last: last, first = first, last

    # read in all the questions and iterate through them
    questions = read_questions.read_questions_no_answers()
    questions = [q for q in questions if int(q[0]) >= first and int(q[0]) <= last]
    for question in questions:
        q_id = int(question[0])
        topdoc = init.get_corpus(q_id)
        doc_nums = topdoc.keys()
        
        # baseline QA system answer process right here...
        for key in doc_nums[:5]:
            doc_text = topdoc[key].split()
            # find a random word from the question
            qs = question[1].split()
            qword = qs[random.randint(0, len(qs) - 1)]
            # pull out sentences from docs that have that word
            positions = [i for i,x in enumerate(doc_text) if x == qword]
            # get a random position
            if len(positions) == 0: positions = [len(doc_text) / 2]
            pos = positions[random.randint(0, len(positions) - 1)]
            q_ids.append(q_id)
            ans_text.append(' '.join(doc_text[(pos - 5):(pos + 5)]))

    return q_ids, ans_text
Esempio n. 4
0
def filter_chunks(q_id):
    answers = chunker.run(q_id)
    my_answers = []

    qList = read_questions.read_questions_no_answers()
    qDict = {}
    b = 0
    for q in qList:
        for q2 in q:
            if b == 0:
                qN = q2
                b = 1
            else:
                qDict[qN] = q2
                b = 0

    for key in qDict:
        if key == str(q_id):
            for ansCandidate in answers:
                print ansCandidate
                passed = pos.pos_test(qDict[key], ansCandidate)
                if (passed != 0):
                    my_answers.append(ansCandidate)

    return my_answers
Esempio n. 5
0
def filter_chunks(q_id):
    answers = chunker.run(q_id)
    my_answers = []
    
    qList = read_questions.read_questions_no_answers()
    qDict = {}
    b = 0
    for q in qList:
        for q2 in q:
            if b == 0:
                qN = q2
                b = 1
            else:
                qDict[qN] = q2
                b = 0
                
    for key in qDict:
        if key == str(q_id):
            for ansCandidate in answers:
                print ansCandidate
                passed = pos.pos_test(qDict[key], ansCandidate)
                if (passed !=0):
                    my_answers.append(ansCandidate)
                    
    return my_answers
Esempio n. 6
0
def get_question_types(n=1):  #Unigrams by default
    questions = read_questions.read_questions_no_answers()
    types = []
    for question in questions:
        a = firstGram(question[1], n)
        if a not in types:
            types.append(a)
    return types
Esempio n. 7
0
def get_question_types(n=1): #Unigrams by default
  questions=read_questions.read_questions_no_answers()
  types=[]
  for question in questions:
    a=firstGram(question[1],n)
    if a not in types:
      types.append(a)	
  return types
Esempio n. 8
0
def rewriteQuestionsList():
    # calls the dictionary version and plugs it into a list of lists (like
    # read_questions is presented)
    qDict = rewriteQuestionsDict(read_questions.read_questions_no_answers())
    questionsList = []
    for key in qDict:
        questions = []
        questions.append(key)
        questions.append(qDict[key])
        questionsList.append(questions)
    return questionsList
Esempio n. 9
0
def rewriteQuestionsList():
    # calls the dictionary version and plugs it into a list of lists (like
    # read_questions is presented)
    qDict = rewriteQuestionsDict(read_questions.read_questions_no_answers())
    questionsList = []
    for key in qDict:
        questions = []
        questions.append(key)
        questions.append(qDict[key])
        questionsList.append(questions)
    return questionsList
Esempio n. 10
0
File: pos.py Progetto: wlf2/cs4740_4
        result = 1
    if features == "VP":
        result = 0.1

    return result


if __name__ == "__main__":
    testQ = 213
    answers = run(testQ)
    #print answers
    #for ansCandidate in answers:
    #print ansCandidate[0], ansCandidate[3]

    # Again I stick the questions into a dictionary
    qList = read_questions.read_questions_no_answers()
    qDict = {}
    b = 0
    for q in qList:
        for q2 in q:
            if b == 0:
                qN = q2
                b = 1
            else:
                qDict[qN] = q2
                b = 0

    for key in qDict:
        #print key, qDict[key]
        # I'm just going to pick one question to debug
        if key == str(testQ):
Esempio n. 11
0
#!/usr/bin/env python
#import sys
#sys.path.append('./modules')
#import monte
import chunker
import numpy as np
import mlpy
import random  #Just for generating fake data
import tom
import check_answers
import read_questions

questionDict = dict(read_questions.read_questions_no_answers())


def run_evaluators(candidates, evaluators):
    #candidate = list of the question-candidate indexes
    confidence = []
    for candidate in candidates:
        #		print candidate[0]
        candidateConfidence = []
        for evaluator in evaluators:
            foo = evaluator(questionDict[str(candidate[4])], candidate)
            candidateConfidence = candidateConfidence + list(foo)
        confidence.append(candidateConfidence)
    return confidence


def train(model, correctness, features):
    '''
	Train a model for classifying answers as correct or not
Esempio n. 12
0
File: pos.py Progetto: wlf2/cs4740_4
        result = 1
    if features == "VP":
        result = 0.1    

    return result


if __name__ == "__main__":
    testQ = 213
    answers = run(testQ)
    #print answers
    #for ansCandidate in answers:
        #print ansCandidate[0], ansCandidate[3]

    # Again I stick the questions into a dictionary
    qList = read_questions.read_questions_no_answers()
    qDict = {}
    b = 0
    for q in qList:
        for q2 in q:
            if b == 0:
                qN = q2
                b = 1
            else:
                qDict[qN] = q2
                b = 0
                
    for key in qDict:
        #print key, qDict[key]
        # I'm just going to pick one question to debug
        if key == str(testQ):
Esempio n. 13
0
#!/usr/bin/env python
#import sys
#sys.path.append('./modules')
#import monte
import chunker
import numpy as np
import mlpy
import random #Just for generating fake data
import tom
import check_answers
import read_questions

questionDict=dict(read_questions.read_questions_no_answers())

def run_evaluators(candidates,evaluators):
	#candidate = list of the question-candidate indexes
	confidence = []
	for candidate in candidates:
#		print candidate[0]
		candidateConfidence=[]
		for evaluator in evaluators:
			foo=evaluator(questionDict[str(candidate[4])],candidate)
			candidateConfidence=candidateConfidence+list(foo)
		confidence.append(candidateConfidence)
	return confidence

def train(model,correctness,features):
	'''
	Train a model for classifying answers as correct or not
	based on the confidence measures from various QA methods.
	This should support paramaters at some point.