def is_how(const_tree): #returns is_how, obj, how cpy = const_tree.copy(deep=True) badadv = ['all', 'almost', 'already', 'also', 'basically', 'further', 'finally', 'generally', 'greatly', 'however', 'initially', 'just', 'later', 'largely', 'longer', 'mostly', 'meanwhile', 'often', 'only', 'perhaps', 'now', 'then', 'typically'] #try by vbphr = getVP(const_tree, len(const_tree)) (thing, bydoing) = findBy(const_tree, vbphr) if thing != None: return thing, " ".join(bydoing.leaves()) else: #try adv (thing, how) = findAdv(cpy, getVP(cpy, len(cpy))) if thing == None or leftmost(how)[0].lower() in badadv: return None, None else: return cpy, " ".join(how.leaves())
def who(const_tree, nertags): res = is_who(const_tree, nertags) q = '' if res: vbphr = getVP(const_tree, len(const_tree)) if vbphr: vp = '' for v in vbphr: vp += ' ' + ' '.join(v.leaves()) q = "Who" + vp + "?" return q return None
def reason_cause(const_tree): #something, cause for phr in const_tree: #if by is before subject if phr.label() == 'SBAR' and leftmost(phr)[0] == 'because': const_tree.remove(phr) return const_tree, phr #if by is within VP vp = getVP(const_tree, len(const_tree)) if vp: for phr in vp: if phr.label() == 'SBAR' and leftmost(phr)[0] == 'because': reason = phr vp.remove(reason) return const_tree, " ".join(reason.leaves()) return None, None
def is_who(const_tree, nertags): cpy = const_tree.copy(deep=True) vbphr = getVP(const_tree, len(const_tree)) nphr = getNP(const_tree, len(const_tree)) if not (vbphr and nphr): return None poss_who = '' for n in nphr: poss_who += ' ' + ' '.join(n.leaves()) spacy_nlp = spacy.load('en') noun = spacy_nlp(poss_who) who = None for c in noun.ents: if (c.label_ == ("PERSON" or "ORG" or "GPE" or "NORP")): who = c if (who == None): return None return str(who)
def is_time(const_tree, nertags, answering): timetags = ['DATE', 'TIME'] vp = getVP(const_tree, len(const_tree)) for phr in const_tree: #if on same level if phr.label() == 'PP': npTree = searchPhrase(phr, 'NP') if len(npTree) > 0: np = ' '.join(npTree[0].leaves()).split() else: break for n in nertags: if n[1] in timetags and np[0] == (n[0]): const_tree.remove(phr) return const_tree, " ".join(phr.leaves()) if vp: for phr in vp: #TODO should be more robust but works for now npTree = searchPhrase(phr, 'NP') if phr.label() == 'PP': if len(npTree) > 0: np = ' '.join(npTree[0].leaves()).split() else: break for n in nertags: if n[1] in timetags and np[0] == (n[0]): vp.remove(phr) return const_tree, " ".join(phr.leaves()) if answering: res = '' for n in nertags: if n[1] in timetags: res += n[0] + ' ' if len(res) > 0 and n[1] not in timetags: break if len(res) > 0: return const_tree, res return None, None
def is_howmany(const_tree, nertags): numtags = ["NUMBER", "CARDINAL"] np = getNP(const_tree, len(const_tree)) vp = getVP(const_tree, len(const_tree)) const_tree1 = const_tree.copy(deep=True) (const_tree2, preps) = searchAndRem(const_tree1, 'PP') preps_lst = [] if preps: preps_lst = ' '.join(preps[0].leaves()).split() if vp: subnpTree = searchPhrase(vp, 'NP') verb = ' '.join(' '.join(vp[0].leaves()).split()) lem_verb = lem(u'' + verb, u'VERB')[0] if len(subnpTree) > 0: subnp = ' '.join(subnpTree[0].leaves()).split() for n in nertags: if (n[1] in numtags and subnp[0] == (n[0])): restnp = (' '.join(subnp)).replace(subnp[0], "") doform = getDoForm(vp[0])[0].lower() + getDoForm(vp[0])[1:] q_body = ' '.join(' '.join(np.leaves()).split()) q_body_lower = q_body[0].lower() + q_body[1:] (const_tree3, subpreps) = searchAndRem(vp, 'PP') if subpreps: p_str = ' '.join(' '.join( subpreps[0].leaves()).split()) p_str_lower = p_str[0].lower() + p_str[1:] ques = 'How many' + restnp + ' ' + doform + ' ' + q_body_lower + ' ' + lem_verb + ' ' + p_str_lower + '?' else: ques = 'How many' + restnp + ' ' + doform + ' ' + q_body_lower + ' ' + lem_verb + '?' return ques, subnp[0] if (len(preps_lst) > 0): for n in nertags: if (n[1] in numtags and n[0] in preps_lst): np_string = ' '.join(' '.join(np.leaves()).split()) np_string_lower = np_string[0].lower() + np_string[1:] vp_string = ' '.join(' '.join(vp.leaves()).split()) no_prep = vp_string.replace(' '.join(preps_lst), "") rest_preps = ' '.join(preps_lst).replace(n[0], "").replace( preps_lst[0], "") no_prep_verb = no_prep.replace(verb, "") try: (prep_tree, det) = searchAndRem(preps, 'DT') rest_preps2 = ' '.join(' '.join( prep_tree[0].leaves()).split()) final_preps = rest_preps2.replace( ' '.join(' '.join(det[0].leaves()).split()), "").replace(n[0], "").replace(preps_lst[0], "") ques = 'How many' + final_preps + ' ' + verb + ' ' + np_string_lower + no_prep_verb + " " + preps_lst[ 0] + "?" except: ques = 'How many' + rest_preps + ' ' + verb + ' ' + np_string_lower + no_prep_verb + " " + preps_lst[ 0] + "?" return ques, n[0] np_list = ' '.join(np.leaves()).split() for n in nertags: if (n[1] in numtags and n[0] in np_list): q_body = ' '.join(' '.join(const_tree.leaves()).split()) q_body1 = q_body.replace(n[0], "") q_body1_lower = q_body1[0].lower() + q_body1[1:] q_body2 = q_body1_lower try: (np_tree, det) = searchAndRem(np, 'DT') rest_np = ' '.join(' '.join(np_tree.leaves()).split()) vp_str = ' '.join(' '.join(vp.leaves()).split()) q_body2 = rest_np.replace( ' '.join(' '.join(det[0].leaves()).split()), "").replace(n[0], "") + ' ' + vp_str except: pass ques = 'How many ' + q_body2 + "?" return ques, n[0] return None, None return None, None