def convert(source, argv=None, log=sys.stdout): ans = trees.PTB_Tree() if '\\' in source.category or '/' in source.category: ans.label = "VP" else: ans.label = category.strip_square_brackets(source.category) if source.word is not None: ans.word = source.word ans.pos = source.pos ans.label = source.pos for subtree in source.subtrees: ans.subtrees.append(convert(subtree)) if argv is None: return ans else: return True, ans, None
def fallback_schema(cat): rules = ["{(TEMP 0)}"] while "/" in cat or "\\" in cat: parts = category.divide(cat) if parts[1] == "/": rules.append("(NP 0 1)") else: rules.append("(NP 1 0)") cat = parts[0] plain_cat = cat if plain_cat not in markup_info: plain_cat = category.strip_square_brackets(cat) if plain_cat in markup_info: markup_lines = markup_info[plain_cat][1:] if "/" not in markup_lines[0] and "\\" not in markup_lines[0]: rules += markup_lines return rules return rules
def get_unary(start_cat, end_cat, markedup=None): # Note: PP_qus - for questions only, ignored for now for unary in UNARIES: start = unary[0] end_markup = unary[1] end = category.strip_braces(end_markup) keep_deps = unary[2] extra = unary[3] rules = unary[4] if category.compare(start_cat, start): if category.compare(end_cat, end): if len(rules) > 0: return rules elif markedup is not None: if end in markedup: return markedup[end][1:] end_no_brac = category.strip_square_brackets(end) if end_no_brac in markedup: return markedup[end_no_brac][1:] else: return [] return None
def apply_markup(source, markup, top=True): global contains_bs # Bottom up, so get the results from below children = [] for subtree in source.subtrees: children.append(apply_markup(subtree, markup, False)) combinator = source.rule result = None verbose_print("using %s combiantor rule" % combinator) for child in children: verbose_print("%s" % child.PTB_tree()) verbose_print(child.__repr__()) if combinator == "lex" or combinator == "type": source_category = source.category if source_category not in markup_info: source_category = category.strip_square_brackets(source.category) schema_text = [] if source_category not in markup_info: print >> log_out, "Missing category:", source.category, "asked for by", combinator print >> sys.stderr, "Missing category:", source.category, "asked for by", combinator else: schema_text = markup_info[source_category] schema = markup_to_schemas(schema_text, source.category, source) if combinator == "lex": result = schema.set_zero("(%s %s)" % (source.pos, source.word)) elif combinator == "type": verbose_print("Type schema:") verbose_print(schema.__repr__()) result = schema.tr(children[0]) elif combinator == "conj1": result = children[0].conj_part1(children[1]) elif combinator == "conj2": result = children[0].conj_part2(children[1]) elif combinator == "unary": unary_rule = rule.get_unary(source.subtrees[0].category, source.category, markup_info) if unary_rule is None: unary_rule = fallback_schema(source.category) schemas = markup_to_schemas(["None"] + unary_rule, source=source) verbose_print("Unary schema:") verbose_print(schemas.__repr__()) result = children[0].special_unary(schemas) elif combinator in ["binary", "bs.f", "bs.b"]: binary_rule = rule.get_binary_for_markedup( source.subtrees[0].category, source.subtrees[1].category, source.category, markup_info ) if binary_rule is None: binary_rule = ["(VP 0 1)"] + fallback_schema(source.category) schemas = markup_to_schemas(["None"] + binary_rule, source=source) verbose_print("Binary schema:") verbose_print(schemas.__repr__()) control = get_next_incomplete_schema(children[0], children[1]) result = control.special_binary(children[1], schemas) elif combinator == "fa.f": control = get_next_incomplete_schema(children[0], children[1]) result = control.fa(children[1], combinator) elif combinator == "fa.b": control = get_next_incomplete_schema(children[1], children[0]) result = control.fa(children[0], combinator) elif combinator == "fc.f": control = get_next_incomplete_schema(children[0], children[1]) argument = get_next_incomplete_schema(children[1], None) result = control.fc(argument) elif combinator == "fc.b": control = get_next_incomplete_schema(children[1], children[0]) argument = get_next_incomplete_schema(children[0], None) result = control.fc(argument) elif combinator == "cc.b": control = get_next_incomplete_schema(children[0], children[1]) result = control.back_cross(children[1]) elif combinator == "misc": if len(source.subtrees) == 2: cur = category.strip_square_brackets(source.category) left = category.strip_square_brackets(source.subtrees[0].category) right = category.strip_square_brackets(source.subtrees[1].category) if cur != left and cur != right: print >> log_out, "miscing an unknown category:", source.category, print >> log_out, "from", source.subtrees[0].category, "and", source.subtrees[1].category print >> sys.stderr, "miscing an unknown category:", source.category, print >> sys.stderr, "from", source.subtrees[0].category, "and", source.subtrees[1].category binary_rule = fallback_schema(source.category) schemas = markup_to_schemas(["None", "(NP 0 1)"] + binary_rule, source=source) verbose_print("Misc Binary schema:") verbose_print(schemas.__repr__()) result = children[0].special_binary(children[1], schemas) else: # check if this forms a PRN words = source.all_word_yield()[1].split() left_word = words[0] right_word = words[-1] verbose_print(left_word + " " + right_word) use_PRN = False if not top: if left_word == "," and right_word == ",": use_PRN = True elif left_word == "--" and right_word == "--": use_PRN = True elif left_word == "-LRB-" and right_word == "-RRB-": use_PRN = True result = children[0].glom(children[1], cur == right) if use_PRN: old_label = result.label result.label = "PRN" result.delete_on_adoption = False nlevel = Schema(["(%s 0)" % old_label] + result.parent, source_node=source) if old_label == "TEMP": nlevel = Schema(["{(%s 0)}" % old_label] + result.parent, source_node=source) nlevel.set_zero(result) nlevel.incomplete = result.incomplete result = nlevel else: print >> sys.stderr, "misc combinator is not handled" verbose_print("resolved: %s" % result.PTB_tree()) verbose_print(result.__repr__()) verbose_print("") return result