def main(treeFile): dump = set(['4 -> 1 3', '0 -> 1 3']) prods = {} pe = evaluate_productions.ProdEvaluator(True, ma_util.GRANULARITY_FINE) for tree in ma_util.readPenn(treeFile): for subTree in ma_util.walkTree(tree): l = pe.getInterestingLabel(subTree) if l: if not l in prods: prods[l] = set() prod = '%s -> %s %s' % (subTree.node, subTree[0].node, subTree[1].node) prods[l].add(prod) if prod in dump: print "DESIRED: %s" % l print subTree.pprint().encode('utf-8') # don't print everything if len(subTree.leaves()) < 5: print "L: %s" % l print subTree.pprint().encode('utf-8') print "-" * 8 for rule in prods: print "RULE: %s" % rule for prod in prods[rule]: print prod print "=" * 8
def getMappingFromNodeIDToSentiment(tigerSentence, pennSentence): """Extracts sentiment values for node IDs. Given a parse tree in TigerXML format and a sentiment-annotated parse tree in Penn Treebank format, this methods extracts the sentiment values for each node and matches it to the node ID obtained from the TigerXML file. Both trees must be isomorphic. @param tigerSentence TigerGraph object containing a sentence parse tree @param pennSentence NLTK Tree containing a sentence parse tree @returns Mapping from node IDs to sentiment values """ res = {} for (tigerNode, pennNode) in itertools.izip_longest( ma_util.walkTree(tigerToTree(tigerSentence)), ma_util.walkTree(pennSentence), fillvalue="LIST_LENGTH_NOT_EQUAL"): tigerID = tigerNode.node pennSentiment = pennNode.node res[tigerID] = pennSentiment return res
def walkTree(tree): return ma_util.walkTree(tree)