Example #1
0
def getHead(syntac_sen):
	t = ParentedTree(syntac_sen.text)


	target = t[0]

	while target.height() != 2:
		### non-trivial rules: no.1 
		flag = 0
		parent = target
		if target.node == "SBARQ":
			for ts in target:
				if ts.node in ["WHNP", "WHPP", "WHADJP", "WHADVP"] and len(ts) > 1:
					
					target = ts
					flag = 1
					break	
		###
		if not flag:
			rules = head_trace_rule[target.node]
			#rules = head_trace_rule.get(target.node, [])
			for rule in rules:
				if rule[0] == "L":
					newTarget = LookByL(target, rule[1:])
				elif rule[0] == "R":
					newTarget = LookByR(target, rule[1:])
				elif rule[0] == "LBP":
					newTarget = LookByLBP(target, rule[1:])
				elif rule[0] == "RBP":
					newTarget = LookByRBP(target, rule[1:])
				if newTarget != "":
					break
			if newTarget == "":
				target = target[0]
			else:
				target = newTarget
			#print target
			#print target.height()
		
		### non-trivial rules: no.2:
		if flag:
			leafPos = getLeafPOS(target)
			m = re.search(r'(NN|NNS)_(\d+) POS_', leafPos)
			if m != None:
				lvs = target.leaves()
				print m.groups()
				target = ParentedTree("("+m.group(1)+" "+lvs[int(m.group(2))]+")")

		### non-trivial rules: no.3
		
		if target.height() == 2 and target.leaves()[0] in ["name", "kind", "type", "genre", "group", "part"]:
			print parent
			for k in parent:
				if k.node == "PP":
					target = k
					break
			pr = parent.right_sibling()
			for p in pr:
				if pr.node == "PP":
					target = pr
					break
				
	return target.leaves()[0]
Example #2
0
def getHead(syntac_sen):
    t = ParentedTree(syntac_sen.text)

    target = t[0]

    while target.height() != 2:
        ### non-trivial rules: no.1
        flag = 0
        parent = target
        if target.node == "SBARQ":
            for ts in target:
                if ts.node in ["WHNP", "WHPP", "WHADJP", "WHADVP"
                               ] and len(ts) > 1:

                    target = ts
                    flag = 1
                    break
        ###
        if not flag:
            rules = head_trace_rule[target.node]
            #rules = head_trace_rule.get(target.node, [])
            for rule in rules:
                if rule[0] == "L":
                    newTarget = LookByL(target, rule[1:])
                elif rule[0] == "R":
                    newTarget = LookByR(target, rule[1:])
                elif rule[0] == "LBP":
                    newTarget = LookByLBP(target, rule[1:])
                elif rule[0] == "RBP":
                    newTarget = LookByRBP(target, rule[1:])
                if newTarget != "":
                    break
            if newTarget == "":
                target = target[0]
            else:
                target = newTarget
            #print target
            #print target.height()

        ### non-trivial rules: no.2:
        if flag:
            leafPos = getLeafPOS(target)
            m = re.search(r'(NN|NNS)_(\d+) POS_', leafPos)
            if m != None:
                lvs = target.leaves()
                print m.groups()
                target = ParentedTree("(" + m.group(1) + " " +
                                      lvs[int(m.group(2))] + ")")

        ### non-trivial rules: no.3

        if target.height() == 2 and target.leaves()[0] in [
                "name", "kind", "type", "genre", "group", "part"
        ]:
            print parent
            for k in parent:
                if k.node == "PP":
                    target = k
                    break
            pr = parent.right_sibling()
            for p in pr:
                if pr.node == "PP":
                    target = pr
                    break

    return target.leaves()[0]