Ejemplo n.º 1
0
def When_module(sent, sent_features):
    question = []
    structures = []
    sNLP = StanfordNLP()

    # print(sent_features)

    # dep_parse = sNLP.dependency_parse(sent)
    # dep_parse = dep_parse.__next__()
    #
    # dep_parse_list = list(dep_parse.triples())

    parse = sNLP.parse(sent)
    # parse.pretty_print()

    # for t in dep_parse_list:
    #     print(t)

    # print(sNLP.ner(sent))
    # print(sNLP.pos(sent))

    when_parseTraversal(sent, parse, question, structures)
    # print(question)
    # print(structures)
    prev_min = float('Inf')

    if len(structures) > 0:
        whenPhrase = ""
        for t in structures:
            if t[1] < prev_min:
                whenPhrase = t[0]
                prev_min = t[1]
        # print(sent)
        # print(whenPhrase)
        thisQ = sent.replace(whenPhrase, "")
        dep_tree = sNLP.dependency_parse(thisQ)
        dep_tree = dep_tree.__next__()
        dep_tree_list = list(dep_tree.triples())
        # for t in dep_tree_list:
        #     print(t)
        return construct_when(thisQ, dep_tree_list)

    for q in question:
        dep_tree = sNLP.dependency_parse(q)
        dep_tree = dep_tree.__next__()
        dep_tree_list = list(dep_tree.triples())
        # for t in dep_tree_list:
        #     print(t)
        return construct_when(q, dep_tree_list)

    # print()

    pass
Ejemplo n.º 2
0
class QGPipeline:

    # SENTENCE SIMPLIFICATION
    ### removing parenthetical phrases
    # print(text)
    def __init__(self):
        self.sNLP = StanfordNLP()
        self.sent_simpl = Simplification()
        self.QG = QuestionGeneration()

    def getParseTree(self, text):
        # text = re.sub("\(.*\)", "", text)
        # text = re.sub("\\n", "", text)
        t = self.sNLP.parse(text)
        # print("Parse:", t)
        return (t)

    def splitConj(self, t):
        # STEP 1: split on conjunctions
        t_list = []
        t_list = self.sent_simpl.splitConjunctions(t, t_list, None)

        if len(t_list) == 0:
            t_list.append(t)

        return (t_list)

    # Simplify split parent sentences
    def simplify_sentence(self, t_list):
        simplified_sentences = []
        for this in t_list:
            processed_text = " ".join(self.sent_simpl.traversalAndSimplification(this))
            processed_text = processed_text.replace(",", "")
            processed_text = re.sub(' +', ' ', processed_text).strip()
            if processed_text[-1] != '.':
                processed_text += ' .'
            simplified_sentences.append(processed_text)

        return (simplified_sentences)

    # print("Simplified Sentences...")
    # print(simplified_sentences)

    #### Question generation
    def QuesGen(self, simplified_sentences):
        final_q_list = []

        for this in simplified_sentences:
            final_q_list.extend(self.QG.QG(this))
        # print("Questions...")
        return (final_q_list)
Ejemplo n.º 3
0
def Where_Which_module(sent, sent_features):
    question = []
    simple_ques = []
    sNLP = StanfordNLP()

    # print(sent_features)

    # dep_parse = sNLP.dependency_parse(sent)
    # dep_parse = dep_parse.__next__()
    #
    # dep_parse_list = list(dep_parse.triples())

    parse = sNLP.parse(sent)
    # parse.pretty_print()
    #
    # for t in dep_parse_list:
    #     print(t)

    # print(sNLP.ner(sent))
    # print(sNLP.pos(sent))

    where_which_inFirstPP(sent, parse, simple_ques)
    if len(simple_ques) > 0:
        for bool, thisSent, nerSet, thisPP in simple_ques:
            dep_tree = sNLP.dependency_parse(thisSent)
            dep_tree = dep_tree.__next__()
            dep_tree_list = list(dep_tree.triples())
            # for t in dep_tree_list:
            #     print(t)
            if bool:
                case = thisPP.split(" ")[0]
                type = ""
                if "COUNTRY" in nerSet:
                    type = "country"
                elif "LOCATION" in nerSet:
                    type = "location"
                elif "CITY" in nerSet:
                    type = "city"
                else:
                    type = "place"
                return([construct_where_which(thisSent, dep_tree_list,case,type)])
            else:
                where_which_parseTraversal(thisSent, dep_tree_list, sNLP.ner(thisSent), question)
                return(question)
Ejemplo n.º 4
0
def why_q(sents):
    # preprocessing

    sNLP = StanfordNLP()

    parse = sNLP.parse(sents)

    sents = What_Who_QG.remove_modifiers(parse)

    # print("remove modifiers", sents)

    tokenized_sentences = []
    question = ""

    tokenized_sentences.append(word_tokenize(sents))
    q_set = []
    for sent in tokenized_sentences:
        pos_tags = nltk.pos_tag(sent)
        # print(pos_tags)
        if (pos_tags[0][1] != 'NNP') and (pos_tags[0][1] != 'NNPS'):
            pos_tags[0] = (pos_tags[0][0].lower(), pos_tags[0][1])
        q_list = copy.deepcopy(pos_tags)
        q_string = ''
        #print(pos_tags)

        for i in range(len(pos_tags)):
            if pos_tags[i][1] == 'VBD':
                q_list[i] = (wnl.lemmatize(pos_tags[i][0], pos='v'), 'VBD')
                q_list.insert(0, ('Why did', 0))
                break
            elif pos_tags[i][1] == 'VBZ':
                if pos_tags[i][0] in aux_words:
                    q_list.insert(0, q_list.pop(i))
                    q_list.insert(0, ("Why", 0))
                else:
                    q_list[i] = (wnl.lemmatize(pos_tags[i][0], pos='v'), "VBZ")
                    if q_list[i][0] == "do": q_list.pop(i)
                    q_list.insert(0, ("Why does", 0))
                break
            elif pos_tags[i][1] == 'VBP':
                q_list.insert(0, q_list.pop(i))
                q_list.insert(0, ("Why", 0))
                break
        replace_string = q_list[0][0][:1].upper() + q_list[0][0][1:]
        q_list[0] = (replace_string, 0)
        #print(q_list)

        question = ' '.join([i[0] for i in q_list])
        ind = -1
        for k in why_keys:
            if question.find(k) != -1:
                ind = question.find(k)
                break
        if ind != -1:
            question = question[:ind - 1]
        question = question + "?"
        # print(question)

    if question != "":
        return (question)
    else:
        return None
Ejemplo n.º 5
0
def bin_question(sents):
    # preprocessing
    # text_file = sys.argv[1]
    # sentences = []
    # with io.open(text_file, 'r', encoding='utf-8') as f:
    #     for line in f:
    #         line = line.strip()
    #         sentences.extend(sent_tokenize(line))
    # # tagging
    # tokenized_sentences = [word_tokenize(i) for i in sentences if
    #                        (len(word_tokenize(i)) > 5) and (len(word_tokenize(i)) < 25)]

    sNLP = StanfordNLP()

    parse = sNLP.parse(sents)

    sents = What_Who_QG.remove_modifiers(parse)

    # print("remove modifiers", sents)

    tokenized_sentences = []
    tokenized_sentences.append(word_tokenize(sents))

    # print("TOKE", tokenized_sentences)
    aux_words = ['are', 'was', 'were', 'is', 'have', 'has']
    aux_words = set(aux_words)
    question_set = []
    # c = 0
    for sent in tokenized_sentences:
        pos_tags = nltk.pos_tag(sent)
        # print(pos_tags)

        if (pos_tags[0][1] != 'NNP') and (pos_tags[0][1] != 'NNPS'):
            pos_tags[0] = (pos_tags[0][0].lower(), pos_tags[0][1])
        q_list = copy.deepcopy(pos_tags)
        q_string = ''
        for i in range(len(pos_tags)):
            if pos_tags[i][0] in aux_words:
                q_list.insert(0, q_list.pop(i))
                break
            elif pos_tags[i][1] == 'VBD':
                q_list[i] = (wnl.lemmatize(pos_tags[i][0], pos='v'), 'VBD')
                q_list.insert(0, ('Did', 0))
                break
            elif pos_tags[i][1] == 'VBZ':
                q_list[i] = (wnl.lemmatize(pos_tags[i][0], pos='v'), "VBZ")
                q_list.insert(0, ("Does", 0))
                # q_list[i] = wnl.lemmatize(pos_tags[i][0], pos = 'v')
                break
            elif pos_tags[i][1] == 'VBP':
                q_list[i] = (wnl.lemmatize(pos_tags[i][0], pos='v'), "VBP")
                q_list.insert(0, ("Do", 0))
                # q_list[i] = wnl.lemmatize(pos_tags[i][0], pos = 'v')
                break
        if q_list[0][0].lower() in [
                'are', 'was', 'were', 'is', 'have', 'has', 'did', 'do', 'does'
        ]:
            replace_string = q_list[0][0][:1].upper() + q_list[0][0][1:]
            q_list[0] = (replace_string, 0)
            question = ' '.join([i[0] for i in q_list])
            question = question[:-2]
            question = question + "?"

            question_set.append(question)

    # print(question_set)

    return question_set