def match(s, p): # match a single pattern, return the noun phrase #print "in func: match string with pattern" #TODO: for future improvement, using getMentions instead t = '0' if p.find('_') == 0: t = 'h' p = ' ' + p[2:] + ' ' else: t = 't' p = ' ' + p[0:-2] + ' ' #print t, p if p in s: #print "pattern found" if t == 't': x = s.split(p)[1] ps = npc.postag(x) np = npc.extFirstNP(ps) return np else: x = s.split(p)[0] ps = npc.postag(x) np = npc.extLastNP(ps) return np else: return ""
def get_patterns(s, ins): # preceding patterns empty room, optionally or non-optionally # print "preceding: [nouns] - verbs - adjectives/prepositions/determiners" # print "preceding: nouns&adjectives - adjectives/prepositions/determiners" # print "following: verbs - noun phrases/preposition" ps = npc.postag(s) ip = npc.postag(ins) ms = get_mentions(ps, ip) #print len(ms) patterns = [] p = 0 l = len(ps) - 1 #print len(ms) for i in range(len(ms)): b = ms[i] e = b + len(ip) if i < len(ms) - 1: l = ms[i + 1] - 1 else: l = len(ps) - 1 p1 = ext1rule(ps, p, b - 1) p2 = ext2rule(ps, p, b - 1) p3 = ext3rule(ps, e, l) if len(p1) != 0: patterns.append(p1 + ' _') #print "p1", p1 if len(p2) != 0: patterns.append(p2 + ' _') #print "p2", p2 if len(p3) != 0: patterns.append('_ ' + p3) #print "p3", p3 # update p and l p = e return patterns