def convert_node_to_mathml(ccg_node, sem_tree, tokens): mathml_str = '' category = ccg_node.get('category').strip() category_mathml = get_category_mathml(category) if len(ccg_node) == 0: token_id = ccg_node.get('terminal') token = find_node_by_id(token_id, tokens) surf = token.get('surf') surf_mathml = get_surface_mathml(surf) mathml_str = get_fraction_mathml(category_mathml, surf_mathml, '0') elif len(ccg_node) == 1: mathml_str_child = convert_node_to_mathml(ccg_node[0], sem_tree, tokens) rule = ccg_node.get('rule') mathml_str = get_fraction_mathml(category_mathml, mathml_str_child, '3', rule) elif len(ccg_node) > 0: mathml_str_children = '' for child in ccg_node: mathml_str_child = convert_node_to_mathml(child, sem_tree, tokens) mathml_str_children += mathml_str_child rule = ccg_node.get('rule') mathml_str = get_fraction_mathml(category_mathml, mathml_str_children, '3', rule) if sem_tree is not None and kDisplaySemantics: span_id = ccg_node.get('id') sem_node = find_node_by_id(span_id, sem_tree) semantics = sem_node.get('sem') semantics_mathml = get_semantics_mathml(semantics) mathml_str = get_fraction_mathml(semantics_mathml, mathml_str, '0') return mathml_str
def convert_node_to_mathml(ccg_node, sem_tree, tokens): mathml_str = '' category = ccg_node.get('category').strip() category_mathml = get_category_mathml(category) if len(ccg_node) == 0: token_id = ccg_node.get('terminal') token = find_node_by_id(token_id, tokens) surf = token.get('surf') surf_mathml = get_surface_mathml(surf) pos = token.get('pos') pos_mathml = get_pos_mathml(pos) entity = token.get('entity') if not entity == None: entity_mathml = get_entity_mathml(entity) pos_mathml = pos_mathml + "<mtext>,</mtext><mspace width='.1em'/>" + entity_mathml pos1 = token.get('pos1') if not (pos1 == None or pos1 == '*'): pos1_mathml = get_pos_mathml(pos1) pos_mathml = pos_mathml + "<mspace width='.1em'/>" + pos1_mathml pos2 = token.get('pos2') if not (pos2 == None or pos2 == '*'): pos2_mathml = get_pos_mathml(pos2) pos_mathml = pos_mathml + "<mspace width='.1em'/>" + pos2_mathml pos3 = token.get('pos3') if not (pos3 == None or pos3 == '*'): pos3_mathml = get_pos_mathml(pos3) pos_mathml = pos_mathml + "<mspace width='.1em'/>" + pos3_mathml if pos == '.': mathml_str = get_fraction_mathml(category_mathml, surf_mathml, '0') else: mathml_pos_str = get_fraction_mathml(category_mathml, pos_mathml, '0') mathml_str = get_fraction_mathml(mathml_pos_str, surf_mathml, '0') elif len(ccg_node) == 1: mathml_str_child = convert_node_to_mathml(ccg_node[0], sem_tree, tokens) rule = ccg_node.get('rule') mathml_str = get_fraction_mathml(category_mathml, mathml_str_child, '1', rule) elif len(ccg_node) > 0: mathml_str_children = '' for child in ccg_node: mathml_str_child = convert_node_to_mathml(child, sem_tree, tokens) mathml_str_children += mathml_str_child rule = ccg_node.get('rule') mathml_str = get_fraction_mathml(category_mathml, mathml_str_children, '1', rule) if sem_tree is not None and kDisplaySemantics: span_id = ccg_node.get('id') sem_node = find_node_by_id(span_id, sem_tree) semantics = sem_node.get('sem') semantics_mathml = get_semantics_mathml(semantics) mathml_str = get_fraction_mathml(semantics_mathml, mathml_str, '0') return mathml_str
def convert_node_to_latex(ccg_node, sem_tree, tokens): latex_str = '' category = ccg_node.get('category').strip() category_latex = get_category_latex(category) if len(ccg_node) == 0: token_id = ccg_node.get('terminal') token = find_node_by_id(token_id, tokens) surf = token.get('surf') surf_latex = get_surface_latex(surf) if surf == '.': latex_str = '.' else: latex_str = get_lex_latex(surf_latex, category_latex) elif len(ccg_node) == 1: latex_str_child = convert_node_to_latex(ccg_node[0], sem_tree, tokens) rule = ccg_node.get('rule') latex_str = get_fraction_latex(category_latex, latex_str_child, rule) elif len(ccg_node) > 0: latex_str_children = '' for child in ccg_node: latex_str_child = convert_node_to_latex(child, sem_tree, tokens) if latex_str_children == '': latex_str_children = latex_str_child else: latex_str_children = latex_str_children + " & " + latex_str_child # latex_str_children += latex_str_child rule = ccg_node.get('rule') latex_str = get_fraction_latex(category_latex, latex_str_children, rule) if sem_tree is not None and kDisplaySemantics: span_id = ccg_node.get('id') sem_node = find_node_by_id(span_id, sem_tree) semantics = sem_node.get('sem') semantics_latex = get_semantics_latex(semantics) latex_str = get_sem_latex(latex_str, semantics_latex) return latex_str
def build_ccg_tree(ccg_xml, root_id=None): """ This function re-arranges the nodes of the XML CCG tree to have a tree structure. It will be useful to traverse the tree. """ if ccg_xml == None or len(ccg_xml) == 0: return None if root_id == None: root_id = ccg_xml.get('root') root_span = copy.deepcopy(semantic_index.find_node_by_id(root_id, ccg_xml)) if 'child' not in root_span.attrib: return root_span children_id = root_span.get('child').split() for child_id in children_id: child_node = build_ccg_tree(ccg_xml, child_id) if child_node != None: root_span.append(child_node) return root_span
def convert_node_to_mathml(ccg_node, sem_tree, tokens): mathml_str = '' category = ccg_node.get('category').strip() category_mathml = get_category_mathml(category) # add ETtype, polarity mathml_ETtype_str = '' mathml_polarity_str = '' if ccg_node.get('ETtype'): mathml_ETtype_str = get_ETtype_mathml(ccg_node.get('ETtype')) if ccg_node.get('polarity'): mathml_polarity_str = get_polarity_mathml(ccg_node.get('polarity')) if len(ccg_node) == 0: # terminal node token_id = ccg_node.get('terminal') token = find_node_by_id(token_id, tokens) surf = token.get('surf') surf_mathml = get_surface_mathml(surf) pos = token.get('pos') pos_mathml = get_pos_mathml(pos) entity = token.get('entity') if not entity == None: entity_mathml = get_entity_mathml(entity) pos_mathml = pos_mathml + "<mtext>,</mtext><mspace width='.1em'/>" + entity_mathml pos1 = token.get('pos1') if not (pos1 == None or pos1 == '*'): pos1_mathml = get_pos_mathml(pos1) pos_mathml = pos_mathml + "<mspace width='.1em'/>" + pos1_mathml pos2 = token.get('pos2') if not (pos2 == None or pos2 == '*'): pos2_mathml = get_pos_mathml(pos2) pos_mathml = pos_mathml + "<mspace width='.1em'/>" + pos2_mathml pos3 = token.get('pos3') if not (pos3 == None or pos3 == '*'): pos3_mathml = get_pos_mathml(pos3) pos_mathml = pos_mathml + "<mspace width='.1em'/>" + pos3_mathml if pos == '.': # punctuation mathml_str = get_fraction_mathml(category_mathml, surf_mathml, '0') else: mathml_pos_str = get_fraction_mathml(category_mathml, pos_mathml, '0') mathml_str = get_fraction_mathml(mathml_pos_str, surf_mathml, '0') # add ETtype, polarity if mathml_ETtype_str: mathml_str = get_fraction_mathml(mathml_ETtype_str, mathml_str, '0') if mathml_polarity_str: mathml_str = get_fraction_mathml(mathml_polarity_str, mathml_str, '0') elif len(ccg_node) == 1: # non term node w/ one child mathml_str_child = convert_node_to_mathml(ccg_node[0], sem_tree, tokens) rule = ccg_node.get('rule') mathml_str = get_fraction_mathml(category_mathml, mathml_str_child, '3', rule) # add ETtype, polarity if mathml_ETtype_str: mathml_str = get_fraction_mathml(mathml_ETtype_str, mathml_str, '0') if mathml_polarity_str: mathml_str = get_fraction_mathml(mathml_polarity_str, mathml_str, '0') elif len(ccg_node) > 0: # non term node w/ 2 or more children mathml_str_children = '' for child in ccg_node: mathml_str_child = convert_node_to_mathml(child, sem_tree, tokens) mathml_str_children += mathml_str_child rule = ccg_node.get('rule') mathml_str = get_fraction_mathml(category_mathml, mathml_str_children, '3', rule) # add ETtype, polarity if mathml_ETtype_str: mathml_str = get_fraction_mathml(mathml_ETtype_str, mathml_str, '0') if mathml_polarity_str: mathml_str = get_fraction_mathml(mathml_polarity_str, mathml_str, '0') if sem_tree is not None and kDisplaySemantics: span_id = ccg_node.get('id') sem_node = find_node_by_id(span_id, sem_tree) semantics = sem_node.get('sem') semantics_mathml = get_semantics_mathml(semantics) mathml_str = get_fraction_mathml(semantics_mathml, mathml_str, '0') # print('\n\n-------------Im a node-------------', file=sys.stderr) # print(mathml_str, file=sys.stderr) return mathml_str