Example #1
0
#!/usr/bin/env python
import nltk
import init
import chunker


#returns 1 if the character following the answer in the document is one of <, . " : !>
#and returns 0 otherwise
def punc_loc(question, (answer, doc_num, index, features, q_id)):
    doc = chunker.clean_punctuation(init.get_doc(doc_num))

    answer = chunker.clean_punctuation(answer)
    #go to index location of candidate within the document
    alist = answer.split(
    )  #split candidate answer into words (space delimiter)
    answer_len = len(alist)  #word length of the candidate answer
    #print answer_len

    punc_word_index = index + answer_len  #index of the word that may contain the punctuation
    dlist = doc.split()  #split the document by words
    punc_word = dlist[punc_word_index]  #getting the actual word from the doc

    #check if that lastcharacter is a punctuation
    if punc_word == ',' or punc_word == '.' or punc_word == '"' or punc_word == ':' or punc_word == '!':
        return 1
    else:
        return 0


#test case below was modified to work with specified doc instead of the actual doc to work with a known index
def test():
Example #2
0
import init
import chunker
import read_questions
import sys

MAX_INT = sys.maxint


def literal_question_distance(question, (answer, doc_num, index, features,
                                         q_id)):
    """Evaluates a candidate based on how close it is to the longest fragment of
    the question in the document

    returns (distance, length of fragment)
    """
    doc = chunker.clean_punctuation(init.get_doc(doc_num))
    (start, _, length) = find_match(question, doc)
    words = doc.split()
    index = len(" ".join(words[0:index + 1]))
    return (min(abs(start - index), abs(start + length - index),
                0 if start <= index <= start + length else MAX_INT), length)


def literal_rewrite_distance(question, candidate):
    """Evaluates a candidate based on how close it is to the longest fragment of
    the re-written question in the document

    returns (distance, length of fragment)
    """
    return literal_question_distance(rewriteQuestion(question), candidate)
Example #3
0
from difflib import SequenceMatcher as SequenceMatcher
import init
import chunker
import read_questions
import sys

MAX_INT = sys.maxint


def literal_question_distance(question, (answer, doc_num, index, features, q_id)):
    """Evaluates a candidate based on how close it is to the longest fragment of
    the question in the document

    returns (distance, length of fragment)
    """
    doc = chunker.clean_punctuation(init.get_doc(doc_num))
    (start, _, length) = find_match(question, doc)
    words = doc.split()
    index = len(" ".join(words[0 : index + 1]))
    return (
        min(abs(start - index), abs(start + length - index), 0 if start <= index <= start + length else MAX_INT),
        length,
    )


def literal_rewrite_distance(question, candidate):
    """Evaluates a candidate based on how close it is to the longest fragment of
    the re-written question in the document

    returns (distance, length of fragment)
    """
Example #4
0
#!/usr/bin/env python
import nltk
import init
import chunker

#returns 1 if the character following the answer in the document is one of <, . " : !>
#and returns 0 otherwise
def punc_loc(question, (answer, doc_num, index, features,q_id)):
    doc = chunker.clean_punctuation(init.get_doc(doc_num))
    
    answer = chunker.clean_punctuation(answer)
    #go to index location of candidate within the document
    alist = answer.split()              #split candidate answer into words (space delimiter)
    answer_len = len(alist)             #word length of the candidate answer
    #print answer_len
    
    punc_word_index = index+answer_len   #index of the word that may contain the punctuation
    dlist = doc.split()                    #split the document by words
    punc_word = dlist[punc_word_index]     #getting the actual word from the doc
    
    #check if that lastcharacter is a punctuation
    if punc_word == ',' or punc_word == '.' or punc_word == '"'  or punc_word == ':' or punc_word == '!':
        return 1
    else: 
        return 0
    
#test case below was modified to work with specified doc instead of the actual doc to work with a known index
def test():
	question = 'What was the name, of the first Russian astronaut to do a spacewalk?'
	doc = 'The name of the first Russian astronaut to do a spacewalk is Aleksei A. Leonov!'
	doc_num = "LA072490-0034"