Esempio n. 1
0
def get_binary_for_markedup(left,
                            right,
                            result,
                            markedup=None,
                            flexible=False):
    for binary in BINARIES:
        if category.compare(left, binary[0]):
            if category.compare(right, binary[1]):
                if category.compare(result, binary[2]):
                    keep_deps = binary[3]
                    rules = binary[4]
                    if len(rules) > 0:
                        return rules
                    elif markedup is not None:
                        return ['(S 0 1)'] + markedup[result][1:]
                    else:
                        return []
    if flexible:
        for binary in BINARIES:
            if category.compare(result, binary[2]):
                rules = binary[4]
                if len(rules) > 0:
                    return rules
                elif markedup is not None:
                    return ['(S 0 1)'] + markedup[result][1:]
                else:
                    return []
    if markedup is not None:
        return ['(S 0 1)'] + markedup[result][1:]
    return None
Esempio n. 2
0
def get_binary_for_markedup(left, right, result, markedup=None, flexible=False):
	for binary in BINARIES:
		if category.compare(left, binary[0]):
			if category.compare(right, binary[1]):
				if category.compare(result, binary[2]):
					keep_deps = binary[3]
					rules = binary[4]
					if len(rules) > 0:
						return rules
					elif markedup is not None:
						return ['(S 0 1)'] + markedup[result][1:]
					else:
						return []
	if flexible:
		for binary in BINARIES:
			if category.compare(result, binary[2]):
				rules = binary[4]
				if len(rules) > 0:
					return rules
				elif markedup is not None:
					return ['(S 0 1)'] + markedup[result][1:]
				else:
					return []
	if markedup is not None:
		return ['(S 0 1)'] + markedup[result][1:]
	return None
Esempio n. 3
0
def get_unary(start_cat, end_cat, markedup=None):
    # Note: PP_qus - for questions only, ignored for now
    for unary in UNARIES:
        start = unary[0]
        end_markup = unary[1]
        end = category.strip_braces(end_markup)
        keep_deps = unary[2]
        extra = unary[3]
        rules = unary[4]
        if category.compare(start_cat, start):
            if category.compare(end_cat, end):
                if len(rules) > 0:
                    return rules
                elif markedup is not None:
                    if end in markedup:
                        return markedup[end][1:]
                    end_no_brac = category.strip_square_brackets(end)
                    if end_no_brac in markedup:
                        return markedup[end_no_brac][1:]
                else:
                    return []
    return None
Esempio n. 4
0
def get_unary(start_cat, end_cat, markedup=None):
	# Note: PP_qus - for questions only, ignored for now
	for unary in UNARIES:
		start = unary[0]
		end_markup = unary[1]
		end = category.strip_braces(end_markup)
		keep_deps = unary[2]
		extra = unary[3]
		rules = unary[4]
		if category.compare(start_cat, start):
			if category.compare(end_cat, end):
				if len(rules) > 0:
					return rules
				elif markedup is not None:
					if end in markedup:
						return markedup[end][1:]
					end_no_brac = category.strip_square_brackets(end)
					if end_no_brac in markedup:
						return markedup[end_no_brac][1:]
				else:
					return []
	return None
Esempio n. 5
0
    def __init__(self, text='', pos=0):
        Tree.__init__(self, text)
        self.label = ''
        self.category = None
        self.orig_category = None
        self.pos = None
        self.word = None
        self.head = None
        self.rule = None
        if text == '':
            return
        if '<L' in text:
            depth = 0
            for i in xrange(pos + 1, len(text)):
                char = text[i]
                # update the depth (note that brackets in categories only muck things up
                # for the category that is the root of this subtree)
                if char == '(':
                    depth += 1
                    if self.label != '' and depth == 1:
                        self.subtrees.append(CCG_Tree(text, i))
                elif char == ')':
                    depth -= 1
                # we've reached the end of the category that is the root of this subtree
                if char == '>' and self.label == '':
                    self.label = text[pos + 2:i]
                # we've reached the end of the scope for this bracket
                if depth < 0:
                    break
            parts = self.label.split()
            self.category = ''.join(parts[1].split('[X]'))
            self.orig_category = self.category
            # Fix a sentence with two broken categories in CCGBank (0595.15)
            if self.category[-1] in '\\/':
                self.category = self.category + 'NP'
            self.rule = rule.determine_combinator(self.subtrees, self.category)
            if 'conj' in self.rule:
                if not self.category.endswith(
                        '[conj]') and not category.compare(
                            self.category, self.subtrees[1].category):
                    if self.subtrees[1].category.endswith('[conj]'):
                        self.category = self.subtrees[1].category
                    else:
                        self.category = self.subtrees[1].category + '[conj]'
            if len(parts) == 4:
                if len(self.subtrees) > 0:
                    self.head = self.subtrees[0]
                if parts[2] == '1' and len(self.subtrees) == 2:
                    self.head = self.subtrees[1]
            elif len(parts) == 6:
                self.pos = parts[3]
                self.word = parts[4]
        else:
            # Handle fowler input
            self.label = text[pos:].split()[0][1:]
            self.category = ')'.join('('.join(
                self.label.split('{')).split('}'))
            self.orig_category = self.category

            depth = 0
            for i in xrange(pos + 1, len(text)):
                if depth < 0:
                    break
                char = text[i]
                # update the depth
                if char == '(':
                    depth += 1
                    if depth == 1:
                        self.subtrees.append(CCG_Tree(text, i))
                elif char == ')':
                    depth -= 1
                    if len(self.subtrees) == 0:
                        pos = i
                        for j in xrange(i, 0, -1):
                            if text[j] == ' ':
                                pos = j
                                break
                        self.word = text[pos + 1:i]
                        break

            self.rule = rule.determine_combinator(self.subtrees, self.category)
            if 'conj' in self.rule:
                if not self.category.endswith(
                        '[conj]') and not category.compare(
                            self.category, self.subtrees[1].category):
                    if self.subtrees[1].category.endswith('[conj]'):
                        self.category = self.subtrees[1].category
                    else:
                        self.category = self.subtrees[1].category + '[conj]'
            if self.word is not None:
                self.pos = "UNK"
                if self.word == '.':
                    self.pos = '.'
                if self.word == ',':
                    self.pos = ','
                if self.word == '...':
                    self.pos = ':'
                if self.word == '?':
                    self.pos = '.'
                if self.word == '!':
                    self.pos = '.'
Esempio n. 6
0
    def __init__(self, text="", pos=0):
        Tree.__init__(self, text)
        self.label = ""
        self.category = None
        self.orig_category = None
        self.pos = None
        self.word = None
        self.head = None
        self.rule = None
        if text == "":
            return
        if "<L" in text:
            depth = 0
            for i in xrange(pos + 1, len(text)):
                char = text[i]
                # update the depth (note that brackets in categories only muck things up
                # for the category that is the root of this subtree)
                if char == "(":
                    depth += 1
                    if self.label != "" and depth == 1:
                        self.subtrees.append(CCG_Tree(text, i))
                elif char == ")":
                    depth -= 1
                    # we've reached the end of the category that is the root of this subtree
                if char == ">" and self.label == "":
                    self.label = text[pos + 2 : i]
                    # we've reached the end of the scope for this bracket
                if depth < 0:
                    break
            parts = self.label.split()
            self.category = "".join(parts[1].split("[X]"))
            self.orig_category = self.category
            # Fix a sentence with two broken categories in CCGBank (0595.15)
            if self.category[-1] in "\\/":
                self.category = self.category + "NP"
            self.rule = rule.determine_combinator(self.subtrees, self.category)
            if "conj" in self.rule:
                if not self.category.endswith("[conj]") and not category.compare(
                    self.category, self.subtrees[1].category
                ):
                    if self.subtrees[1].category.endswith("[conj]"):
                        self.category = self.subtrees[1].category
                    else:
                        self.category = self.subtrees[1].category + "[conj]"
            if len(parts) == 4:
                if len(self.subtrees) > 0:
                    self.head = self.subtrees[0]
                if parts[2] == "1" and len(self.subtrees) == 2:
                    self.head = self.subtrees[1]
            elif len(parts) == 6:
                self.pos = parts[3]
                self.word = parts[4]
        else:
            # Handle fowler input
            self.label = text[pos:].split()[0][1:]
            self.category = ")".join("(".join(self.label.split("{")).split("}"))
            self.orig_category = self.category

            depth = 0
            for i in xrange(pos + 1, len(text)):
                if depth < 0:
                    break
                char = text[i]
                # update the depth
                if char == "(":
                    depth += 1
                    if depth == 1:
                        self.subtrees.append(CCG_Tree(text, i))
                elif char == ")":
                    depth -= 1
                    if len(self.subtrees) == 0:
                        pos = i
                        for j in xrange(i, 0, -1):
                            if text[j] == " ":
                                pos = j
                                break
                        self.word = text[pos + 1 : i]
                        break

            self.rule = rule.determine_combinator(self.subtrees, self.category)
            if "conj" in self.rule:
                if not self.category.endswith("[conj]") and not category.compare(
                    self.category, self.subtrees[1].category
                ):
                    if self.subtrees[1].category.endswith("[conj]"):
                        self.category = self.subtrees[1].category
                    else:
                        self.category = self.subtrees[1].category + "[conj]"
            if self.word is not None:
                self.pos = "UNK"
                if self.word == ".":
                    self.pos = "."
                if self.word == ",":
                    self.pos = ","
                if self.word == "...":
                    self.pos = ":"
                if self.word == "?":
                    self.pos = "."
                if self.word == "!":
                    self.pos = "."
Esempio n. 7
0
def determine_combinator(source, result):
    ###	print len(source)
    ###	print ' '.join(source), result
    if len(source) == 0:
        return 'lex'
    if len(source) == 1:
        if get_unary(source[0].category, result) is not None:
            return 'unary'
        return 'type'
    if len(source) == 2:
        left = source[0].category
        right = source[1].category
        result_parts = category.divide(result)
        left_parts = category.divide(left)
        right_parts = category.divide(right)

        if get_binary(left, right, result) is not None:
            return 'binary'

        # Coordination
        # X = X CONJ X
        if left == 'conj' or (result.endswith('[conj]')
                              and not '[conj]' in right):
            if right == 'conj\\conj':
                return 'fa.b'
            return 'conj1'
        elif 'conj' in source[1].rule or '[conj]' in right:
            if category.compare(left, right):
                return 'conj2'
            if category.compare(category.divide(left)[2],
                                right) and category.divide(left)[1] == '/':
                return 'fa.f'
            if category.compare(
                    category.divide(right)[0],
                    left) and category.divide(right)[1] is not None:
                if 'conj2' in source[
                        1].rule or '[conj]' in right and category.compare(
                            category.divide(right)[2], left):
                    return 'fa.b'
                else:
                    return 'conj1'
            if category.compare(category.divide(right)[2], left):
                return 'fa.b'
            if (category.compare(left_parts[2], result_parts[2])
                    and category.compare(left_parts[0], right_parts[2])
                    and category.compare(right_parts[0], result_parts[0])
                    and left_parts[1] == result_parts[1] == '/'
                    and right_parts[1] == '\\'):
                return 'cc.b'
            if (category.compare(left_parts[2], right_parts[0])
                    and category.compare(left_parts[0], result_parts[0])
                    and category.compare(right_parts[2], result_parts[2]) and
                    left_parts[1] == right_parts[1] == result_parts[1] == '/'):
                return 'fc.f'
            if (category.compare(left_parts[2], result_parts[2])
                    and category.compare(left_parts[0], right_parts[2])
                    and category.compare(right_parts[0], result_parts[0])
                    and left_parts[1] == right_parts[1] == result_parts[1] ==
                    '\\'):
                return 'fc.b'
            if category.compare(result, left):
                if '[conj]' in result:
                    return 'conj2'
                raw_right = right
                if '[conj]' in right:
                    raw_right = right[:-6]
                if category.compare(result, raw_right):
                    return 'conj2'
            else:
                return 'conj2'
        elif 'conj1' in source[0].rule or '[conj]' in left:
            return 'conj2'
        # consider conj3, to handle , separated lists

        # Function application
        # X = X/Y + Y
        if (left_parts[1] == '/' and category.compare(left_parts[2], right)
                and category.compare(left_parts[0], result)):
            return 'fa.f'
        # X = Y + X\Y
        if (right_parts[1] == '\\' and category.compare(right_parts[2], left)
                and category.compare(right_parts[0], result)):
            return 'fa.b'

        # Function composition
        # X/Z = X/Y + Y/Z
        if (category.compare(left_parts[2], right_parts[0])
                and category.compare(left_parts[0], result_parts[0])
                and category.compare(right_parts[2], result_parts[2])
                and left_parts[1] == right_parts[1] == result_parts[1] == '/'):
            return 'fc.f'
        # X\Z = Y\Z + X\Y
        if (category.compare(left_parts[2], result_parts[2])
                and category.compare(left_parts[0], right_parts[2])
                and category.compare(right_parts[0], result_parts[0]) and
                left_parts[1] == right_parts[1] == result_parts[1] == '\\'):
            return 'fc.b'

        # Crossed composition
        # X/Z = Y/Z + X\Y
        # For example:
        # (S\NP)/(S\NP) = (S\NP)/(S\NP) + (S\NP)\(S\NP)
        if (category.compare(left_parts[2], result_parts[2])
                and category.compare(left_parts[0], right_parts[2])
                and category.compare(right_parts[0], result_parts[0])
                and left_parts[1] == result_parts[1] == '/'
                and right_parts[1] == '\\'):
            return 'cc.b'
        # Z\X = Z/Y + Y\X
        # ((S\NP)/S)/(S\NP) = ((S\NP)/S)/(S\NP) + (S\NP)\(S\NP)

        # Backward crossed substitution
        # X/Z = B/Z + (X\B)/Z
        if (left_parts[1] == right_parts[1] == result_parts[1] == '/'
                and category.compare(left_parts[2], result_parts[2])
                and category.compare(right_parts[2], result_parts[2])):
            sub_parts = category.divide(right_parts[0])
            if (category.compare(sub_parts[0], result_parts[0])
                    and category.compare(sub_parts[2], left_parts[0])
                    and sub_parts[1] != left_parts[1]):
                return 'bs.f'
        # X\Z = (X/B)\Z + B\Z
        if (left_parts[1] == right_parts[1] == result_parts[1] == '\\'
                and category.compare(left_parts[2], result_parts[2])
                and category.compare(right_parts[2], result_parts[2])):
            sub_parts = category.divide(left_parts[0])
            if (sub_parts[0] == result_parts[0]
                    and sub_parts[2] == right_parts[0]
                    and sub_parts[1] != right_parts[1]):
                return 'bs.b'
        # There are restrictions on what B can be, but since this is a parse, and
        # all other options have been exhausted, this must be what is going on

        # Uncomment to see what is misc:


###	if left == result and '/' not in right and '\\' not in right:
###		pass
###	elif right == result and '/' not in left and '\\' not in left:
###		pass
###	elif '[conj]' in left or '[conj]' in right or '[conj]' in result:
###		pass
###	else:
###		print 'misc rule:', left, right, result
###		print ' ', left_parts
###		print ' ', right_parts
###		print ' ', result_parts
        if category.divide(result)[0] == right and category.divide(
                result)[1] is not None:
            return 'conj1'
    return 'misc'
Esempio n. 8
0
def determine_combinator(source, result):
###	print len(source)
###	print ' '.join(source), result
	if len(source) == 0:
		return 'lex'
	if len(source) == 1:
		if get_unary(source[0].category, result) is not None:
			return 'unary'
		return 'type'
	if len(source) == 2:
		left = source[0].category
		right = source[1].category
		result_parts = category.divide(result)
		left_parts = category.divide(left)
		right_parts = category.divide(right)

		if get_binary(left, right, result) is not None:
			return 'binary'

		# Coordination
		# X = X CONJ X
		if left == 'conj' or (result.endswith('[conj]') and not '[conj]' in right):
			if right == 'conj\\conj':
				return 'fa.b'
			return 'conj1'
		elif 'conj' in source[1].rule or '[conj]' in right:
			if category.compare(left, right):
				return 'conj2'
			if category.compare(category.divide(left)[2], right) and category.divide(left)[1] == '/':
				return 'fa.f'
			if category.compare(category.divide(right)[0], left) and category.divide(right)[1] is not None:
				if 'conj2' in source[1].rule or '[conj]' in right and category.compare(category.divide(right)[2], left):
					return 'fa.b'
				else:
					return 'conj1'
			if category.compare(category.divide(right)[2], left):
				return 'fa.b'
			if (category.compare(left_parts[2], result_parts[2]) and
					category.compare(left_parts[0], right_parts[2]) and
					category.compare(right_parts[0], result_parts[0]) and
					left_parts[1] == result_parts[1] == '/' and
					right_parts[1] == '\\'):
				return 'cc.b'
			if (category.compare(left_parts[2], right_parts[0]) and
					category.compare(left_parts[0], result_parts[0]) and
					category.compare(right_parts[2], result_parts[2]) and
					left_parts[1] == right_parts[1] == result_parts[1] == '/'):
				return 'fc.f'
			if (category.compare(left_parts[2], result_parts[2]) and
					category.compare(left_parts[0], right_parts[2]) and
					category.compare(right_parts[0], result_parts[0]) and
					left_parts[1] == right_parts[1] == result_parts[1] == '\\'):
				return 'fc.b'
			if category.compare(result, left):
				if '[conj]' in result:
					return 'conj2'
				raw_right = right
				if '[conj]' in right:
					raw_right = right[:-6]
				if category.compare(result, raw_right):
					return 'conj2'
			else:
				return 'conj2'
		elif 'conj1' in source[0].rule or '[conj]' in left:
			return 'conj2'
		# consider conj3, to handle , separated lists

		# Function application
		# X = X/Y + Y
		if (left_parts[1] == '/' and
		    category.compare(left_parts[2], right) and
		    category.compare(left_parts[0], result)):
			return 'fa.f'
		# X = Y + X\Y
		if (right_parts[1] == '\\' and
		    category.compare(right_parts[2], left) and
		    category.compare(right_parts[0], result)):
			return 'fa.b'

		# Function composition
		# X/Z = X/Y + Y/Z
		if (category.compare(left_parts[2], right_parts[0]) and
		    category.compare(left_parts[0], result_parts[0]) and
		    category.compare(right_parts[2], result_parts[2]) and
		    left_parts[1] == right_parts[1] == result_parts[1] == '/'):
			return 'fc.f'
		# X\Z = Y\Z + X\Y
		if (category.compare(left_parts[2], result_parts[2]) and
		    category.compare(left_parts[0], right_parts[2]) and
		    category.compare(right_parts[0], result_parts[0]) and
		    left_parts[1] == right_parts[1] == result_parts[1] == '\\'):
			return 'fc.b'

		# Crossed composition
		# X/Z = Y/Z + X\Y
		# For example:
		# (S\NP)/(S\NP) = (S\NP)/(S\NP) + (S\NP)\(S\NP)
		if (category.compare(left_parts[2], result_parts[2]) and
		    category.compare(left_parts[0], right_parts[2]) and
		    category.compare(right_parts[0], result_parts[0]) and
		    left_parts[1] == result_parts[1] == '/' and
		    right_parts[1] == '\\'):
			return 'cc.b'
		# Z\X = Z/Y + Y\X
		# ((S\NP)/S)/(S\NP) = ((S\NP)/S)/(S\NP) + (S\NP)\(S\NP)

		# Backward crossed substitution
		# X/Z = B/Z + (X\B)/Z
		if (left_parts[1] == right_parts[1] == result_parts[1] == '/' and
		    category.compare(left_parts[2], result_parts[2]) and
		    category.compare(right_parts[2], result_parts[2])):
			sub_parts = category.divide(right_parts[0])
			if (category.compare(sub_parts[0], result_parts[0]) and
			    category.compare(sub_parts[2], left_parts[0]) and
			    sub_parts[1] != left_parts[1]):
				return 'bs.f'
		# X\Z = (X/B)\Z + B\Z
		if (left_parts[1] == right_parts[1] == result_parts[1] == '\\' and
		    category.compare(left_parts[2], result_parts[2]) and
		    category.compare(right_parts[2], result_parts[2])):
			sub_parts = category.divide(left_parts[0])
			if (sub_parts[0] == result_parts[0] and
			    sub_parts[2] == right_parts[0] and
			    sub_parts[1] != right_parts[1]):
				return 'bs.b'
		# There are restrictions on what B can be, but since this is a parse, and
		# all other options have been exhausted, this must be what is going on

		# Uncomment to see what is misc:
###	if left == result and '/' not in right and '\\' not in right:
###		pass
###	elif right == result and '/' not in left and '\\' not in left:
###		pass
###	elif '[conj]' in left or '[conj]' in right or '[conj]' in result:
###		pass
###	else:
###		print 'misc rule:', left, right, result
###		print ' ', left_parts
###		print ' ', right_parts
###		print ' ', result_parts
		if category.divide(result)[0] == right and category.divide(result)[1] is not None:
			return 'conj1'
	return 'misc'