예제 #1
0
def convert(source, argv=None, log=sys.stdout):
	ans = trees.PTB_Tree()
	if '\\' in source.category or '/' in source.category:
		ans.label = "VP"
	else:
		ans.label = category.strip_square_brackets(source.category)
	if source.word is not None:
		ans.word = source.word
		ans.pos = source.pos
		ans.label = source.pos
	for subtree in source.subtrees:
		ans.subtrees.append(convert(subtree))
	if argv is None:
		return ans
	else:
		return True, ans, None
예제 #2
0
def convert(source, argv=None, log=sys.stdout):
    ans = trees.PTB_Tree()
    if '\\' in source.category or '/' in source.category:
        ans.label = "VP"
    else:
        ans.label = category.strip_square_brackets(source.category)
    if source.word is not None:
        ans.word = source.word
        ans.pos = source.pos
        ans.label = source.pos
    for subtree in source.subtrees:
        ans.subtrees.append(convert(subtree))
    if argv is None:
        return ans
    else:
        return True, ans, None
def fallback_schema(cat):
    rules = ["{(TEMP 0)}"]
    while "/" in cat or "\\" in cat:
        parts = category.divide(cat)
        if parts[1] == "/":
            rules.append("(NP 0 1)")
        else:
            rules.append("(NP 1 0)")
        cat = parts[0]
        plain_cat = cat
        if plain_cat not in markup_info:
            plain_cat = category.strip_square_brackets(cat)
        if plain_cat in markup_info:
            markup_lines = markup_info[plain_cat][1:]
            if "/" not in markup_lines[0] and "\\" not in markup_lines[0]:
                rules += markup_lines
                return rules
    return rules
예제 #4
0
def get_unary(start_cat, end_cat, markedup=None):
    # Note: PP_qus - for questions only, ignored for now
    for unary in UNARIES:
        start = unary[0]
        end_markup = unary[1]
        end = category.strip_braces(end_markup)
        keep_deps = unary[2]
        extra = unary[3]
        rules = unary[4]
        if category.compare(start_cat, start):
            if category.compare(end_cat, end):
                if len(rules) > 0:
                    return rules
                elif markedup is not None:
                    if end in markedup:
                        return markedup[end][1:]
                    end_no_brac = category.strip_square_brackets(end)
                    if end_no_brac in markedup:
                        return markedup[end_no_brac][1:]
                else:
                    return []
    return None
예제 #5
0
def get_unary(start_cat, end_cat, markedup=None):
	# Note: PP_qus - for questions only, ignored for now
	for unary in UNARIES:
		start = unary[0]
		end_markup = unary[1]
		end = category.strip_braces(end_markup)
		keep_deps = unary[2]
		extra = unary[3]
		rules = unary[4]
		if category.compare(start_cat, start):
			if category.compare(end_cat, end):
				if len(rules) > 0:
					return rules
				elif markedup is not None:
					if end in markedup:
						return markedup[end][1:]
					end_no_brac = category.strip_square_brackets(end)
					if end_no_brac in markedup:
						return markedup[end_no_brac][1:]
				else:
					return []
	return None
def apply_markup(source, markup, top=True):
    global contains_bs
    # Bottom up, so get the results from below
    children = []
    for subtree in source.subtrees:
        children.append(apply_markup(subtree, markup, False))
    combinator = source.rule
    result = None
    verbose_print("using %s combiantor rule" % combinator)
    for child in children:
        verbose_print("%s" % child.PTB_tree())
        verbose_print(child.__repr__())
    if combinator == "lex" or combinator == "type":
        source_category = source.category
        if source_category not in markup_info:
            source_category = category.strip_square_brackets(source.category)
        schema_text = []
        if source_category not in markup_info:
            print >> log_out, "Missing category:", source.category, "asked for by", combinator
            print >> sys.stderr, "Missing category:", source.category, "asked for by", combinator
        else:
            schema_text = markup_info[source_category]
        schema = markup_to_schemas(schema_text, source.category, source)
        if combinator == "lex":
            result = schema.set_zero("(%s %s)" % (source.pos, source.word))
        elif combinator == "type":
            verbose_print("Type schema:")
            verbose_print(schema.__repr__())
            result = schema.tr(children[0])
    elif combinator == "conj1":
        result = children[0].conj_part1(children[1])
    elif combinator == "conj2":
        result = children[0].conj_part2(children[1])
    elif combinator == "unary":
        unary_rule = rule.get_unary(source.subtrees[0].category, source.category, markup_info)
        if unary_rule is None:
            unary_rule = fallback_schema(source.category)
        schemas = markup_to_schemas(["None"] + unary_rule, source=source)
        verbose_print("Unary schema:")
        verbose_print(schemas.__repr__())
        result = children[0].special_unary(schemas)
    elif combinator in ["binary", "bs.f", "bs.b"]:
        binary_rule = rule.get_binary_for_markedup(
            source.subtrees[0].category, source.subtrees[1].category, source.category, markup_info
        )
        if binary_rule is None:
            binary_rule = ["(VP 0 1)"] + fallback_schema(source.category)
        schemas = markup_to_schemas(["None"] + binary_rule, source=source)
        verbose_print("Binary schema:")
        verbose_print(schemas.__repr__())
        control = get_next_incomplete_schema(children[0], children[1])
        result = control.special_binary(children[1], schemas)
    elif combinator == "fa.f":
        control = get_next_incomplete_schema(children[0], children[1])
        result = control.fa(children[1], combinator)
    elif combinator == "fa.b":
        control = get_next_incomplete_schema(children[1], children[0])
        result = control.fa(children[0], combinator)
    elif combinator == "fc.f":
        control = get_next_incomplete_schema(children[0], children[1])
        argument = get_next_incomplete_schema(children[1], None)
        result = control.fc(argument)
    elif combinator == "fc.b":
        control = get_next_incomplete_schema(children[1], children[0])
        argument = get_next_incomplete_schema(children[0], None)
        result = control.fc(argument)
    elif combinator == "cc.b":
        control = get_next_incomplete_schema(children[0], children[1])
        result = control.back_cross(children[1])
    elif combinator == "misc":
        if len(source.subtrees) == 2:
            cur = category.strip_square_brackets(source.category)
            left = category.strip_square_brackets(source.subtrees[0].category)
            right = category.strip_square_brackets(source.subtrees[1].category)
            if cur != left and cur != right:
                print >> log_out, "miscing an unknown category:", source.category,
                print >> log_out, "from", source.subtrees[0].category, "and", source.subtrees[1].category
                print >> sys.stderr, "miscing an unknown category:", source.category,
                print >> sys.stderr, "from", source.subtrees[0].category, "and", source.subtrees[1].category
                binary_rule = fallback_schema(source.category)
                schemas = markup_to_schemas(["None", "(NP 0 1)"] + binary_rule, source=source)
                verbose_print("Misc Binary schema:")
                verbose_print(schemas.__repr__())
                result = children[0].special_binary(children[1], schemas)
            else:
                # check if this forms a PRN
                words = source.all_word_yield()[1].split()
                left_word = words[0]
                right_word = words[-1]
                verbose_print(left_word + " " + right_word)
                use_PRN = False
                if not top:
                    if left_word == "," and right_word == ",":
                        use_PRN = True
                    elif left_word == "--" and right_word == "--":
                        use_PRN = True
                    elif left_word == "-LRB-" and right_word == "-RRB-":
                        use_PRN = True
                result = children[0].glom(children[1], cur == right)
                if use_PRN:
                    old_label = result.label
                    result.label = "PRN"
                    result.delete_on_adoption = False
                    nlevel = Schema(["(%s 0)" % old_label] + result.parent, source_node=source)
                    if old_label == "TEMP":
                        nlevel = Schema(["{(%s 0)}" % old_label] + result.parent, source_node=source)
                    nlevel.set_zero(result)
                    nlevel.incomplete = result.incomplete
                    result = nlevel
        else:
            print >> sys.stderr, "misc combinator is not handled"
    verbose_print("resolved: %s" % result.PTB_tree())
    verbose_print(result.__repr__())
    verbose_print("")
    return result