def get_constituents_with_label(parse_dict, connective): DocID = connective.DocID sent_index = connective.sent_index parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip( ) syntax_tree = Syntax_tree(parse_tree) if syntax_tree.tree == None: return [] conn_indices = connective.token_indices constituent_nodes = [] if len(conn_indices) == 1: # like and or so... conn_node = syntax_tree.get_leaf_node_by_token_index( conn_indices[0]).up else: conn_node = syntax_tree.get_common_ancestor_by_token_indices( conn_indices) conn_leaves = set([ syntax_tree.get_leaf_node_by_token_index(conn_index) for conn_index in conn_indices ]) children = conn_node.get_children() for child in children: leaves = set(child.get_leaves()) if conn_leaves & leaves == set([]): constituent_nodes.append(child) curr = conn_node while not curr.is_root(): constituent_nodes.extend(syntax_tree.get_siblings(curr)) curr = curr.up Arg1_token_indices = connective.Arg1_token_indices Arg2_token_indices = connective.Arg2_token_indices Arg1_leaves = set([ syntax_tree.get_leaf_node_by_token_index(index) for index in Arg1_token_indices ]) Arg2_leaves = set([ syntax_tree.get_leaf_node_by_token_index(index) for index in Arg2_token_indices ]) # 根据node生成Constituent对象,并标记 constituents = [] for node in constituent_nodes: cons = Constituent(syntax_tree, node) cons.connective = connective leaves = set(node.get_leaves()) if leaves <= Arg1_leaves: cons.label = "Arg1" elif leaves <= Arg2_leaves: cons.label = "Arg2" else: cons.label = "NULL" constituents.append(cons) return constituents
def _get_constituents(parse_dict, connective): DocID = connective.DocID sent_index = connective.sent_index parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip() syntax_tree = Syntax_tree(parse_tree) if syntax_tree.tree == None: return [] conn_indices = connective.token_indices constituent_nodes = [] if len(conn_indices) == 1:# like and or so... conn_node = syntax_tree.get_leaf_node_by_token_index(conn_indices[0]).up else: conn_node = syntax_tree.get_common_ancestor_by_token_indices(conn_indices) conn_leaves = set([syntax_tree.get_leaf_node_by_token_index(conn_index) for conn_index in conn_indices]) children = conn_node.get_children() for child in children: leaves = set(child.get_leaves()) if conn_leaves & leaves == set([]): constituent_nodes.append(child) curr = conn_node while not curr.is_root(): constituent_nodes.extend(syntax_tree.get_siblings(curr)) curr = curr.up # obtain the Constituent object according to the node. constituents = [] for node in constituent_nodes: cons = Constituent(syntax_tree, node) cons.connective = connective constituents.append(cons) return constituents
def get_constituents_with_label2(parse_dict, connective): DocID = connective.DocID sent_index = connective.sent_index parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip() syntax_tree = Syntax_tree(parse_tree) if syntax_tree.tree == None: return [] conn_indices = connective.token_indices constituent_nodes = [] if len(conn_indices) == 1:# like and or so... conn_node = syntax_tree.get_leaf_node_by_token_index(conn_indices[0]).up else: conn_node = syntax_tree.get_common_ancestor_by_token_indices(conn_indices) conn_leaves = set([syntax_tree.get_leaf_node_by_token_index(conn_index) for conn_index in conn_indices]) children = conn_node.get_children() for child in children: leaves = set(child.get_leaves()) if conn_leaves & leaves == set([]): constituent_nodes.append(child) curr = conn_node while not curr.is_root(): constituent_nodes.extend(syntax_tree.get_siblings(curr)) curr = curr.up Arg1_token_indices = connective.Arg1_token_indices Arg2_token_indices = connective.Arg2_token_indices Arg1_leaves = set([syntax_tree.get_leaf_node_by_token_index(index) for index in Arg1_token_indices]) Arg2_leaves = set([syntax_tree.get_leaf_node_by_token_index(index) for index in Arg2_token_indices]) constituents = [] for node in constituent_nodes: cons = Constituent(syntax_tree, node) cons.connective = connective leaves = set(node.get_leaves()) if leaves <= Arg1_leaves: cons.label = "Arg1" elif leaves <= Arg2_leaves: cons.label = "Arg2" else: cons.label = "NULL" constituents.append(cons) return constituents
def ssArgumentExt(inputFilenamePath): parse_file = codecs.open(inputFilenamePath+'/parses.json', encoding='utf8'); en_parse_dict = json.load(parse_file); i = 0; for prediction in observedArray: filename = bigDiction[i][2]; sentenceNumber = int(bigDiction[i+1][3]) + 1; connWordID = int(bigDiction[i][4]); print "ConnWordID: " + str(connWordID); parse_tree = en_parse_dict[filename]["sentences"][sentenceNumber]["parsetree"].strip(); syntax_tree = Syntax_tree(parse_tree) if syntax_tree.tree == None: return [] #Get Connective Indices conn_indices = [connWordID]; constituent_nodes = []; if len(conn_indices) == 1:# like and or so... conn_node = syntax_tree.get_leaf_node_by_token_index(conn_indices[0]).up else: conn_node = syntax_tree.get_common_ancestor_by_token_indices(conn_indices) conn_leaves = set([syntax_tree.get_leaf_node_by_token_index(conn_index) for conn_index in conn_indices]) children = conn_node.get_children() for child in children: leaves = set(child.get_leaves()) if conn_leaves & leaves == set([]): constituent_nodes.append(child) curr = conn_node while not curr.is_root(): constituent_nodes.extend(syntax_tree.get_siblings(curr)) curr = curr.up # obtain the Constituent object according to the node. constituents = [] for node in constituent_nodes: cons = Constituent(syntax_tree, node) #print "Object Type: " + str(cons.type()); #print "Object Dir: " + str(cons.dir()); #print "Object id: " + str(cons.id()); #print "cons: " + str(cons.connective); connective = Connective(filename, sentenceNumber, conn_indices, "text"); cons.connective = connective constituents.append(cons) i = i + 1; print "Connective ID:" + str(connWordID); print "Size of Observed Array: " + str(len(observedArray)); print "Size of Constituents Array: " + str(len(constituents));