Python collapseToTmp Examples

Programming Language: Python

Namespace/Package Name: tigerhelper

Method/Function: collapseToTmp

Examples at hotexamples.com: 2

Python collapseToTmp - 2 examples found. These are the top rated real world Python examples of tigerhelper.collapseToTmp extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_project.py Project: mhaas/ma-thesis

 def test2(self):
     inputFile = "tests/1.xml"
     inputFile = th.collapseToTmp(inputFile)
     print "Unary-collapsed tree at %s" % inputFile
     self.maxDiff = None
     penn = shared.ma_util.readPenn("tests/1-binarization-yv.ptb").next()
     tiger = project.readTiger(inputFile).next()
     res = project.getMappingFromNodeIDToSentiment(tiger, penn)
     expected = yaml.load(open("tests/1.newcollapse.expected.yml"))
     self.assertEqual(self._mapToString(res), self._mapToString(expected))

Example #2

Show file

File: project.py Project: mhaas/ma-thesis

def main(inputFile, annotations, alignment, targetFile, output,
         stripTargetIDPrefix, applyParentSentiment, projectRootSentiment,
         alignTypes):
    """
    Projects sentiment labels from a source tree to a target tree
    using an alignment between source and target nodes.

    @param inputFile {basestring} Filename of source treebank in TigerXML
           format
    @param annotations {basestring} Filename of treebank with sentiment labels
           in Penn Treebank format
    @param alignment {basestring} Filename of mapping between source and
           target nodes in Stockholm Treealigner format
    @param targetFile {basestring} Filename of target treebank in TigerXML
           format
    @param output {basestring} Filename for resulting output file
    @param stripTargetIDPrefix {boolean} Whether to strip alphabetic prefixes
           from node IDs in target tree
    @param applyParentSentiment {boolean} Whether to infer sentiment labels
          for unaligned nodes from ancestor nodes
    @param projectRootSentiment {boolean} Whether to perform implicit alignment
    between source and target root nodes if unaligned
    @param alignTypes {list} Which link types to include: good, fuzzy or both
    """
    mapping = {}
    logger.info("Loading alignment.")
    alignment = readAlignment(alignment, alignTypes)
    logger.info("Done loading alignment.")
    logger.info("Alignment source was: %s", alignment["source"])
    logger.info("Alignment target was: %s", alignment["target"])
    alignment = alignment["alignment"]
    logger.info("Collapsing unary nodes for source file")
    # Now get some node statistic from source/input side
    # This means we have to load the file again in tigerHelper
    inputHelper = TigerHelper(inputFile)
    print ("Target has %s nodes (T, NT) before unary-collapsing nodes"
           % inputHelper.count)
    del inputHelper
    # Now overwrite inputFile variable!
    inputFile = th.collapseToTmp(inputFile, alignment.keys())
    logger.info("Wrote unary-collapsed source tigerXML to %s", inputFile)
    logger.info("Extracting mapping from source ID to sentiment value.")
    for (tigerSentence, pennSentence) in itertools.izip_longest(
            readTiger(inputFile), ma_util.readPenn(annotations),
            fillvalue="LIST_LENGTH_NOT_EQUAL"):
        mapping.update(
            getMappingFromNodeIDToSentiment(tigerSentence, pennSentence))
    logger.info("Done extracting mapping.")
    fh = open(targetFile, "r")
    target = etree.parse(fh)
    fh.close()
    tigerHelper = TigerHelper(target, stripTargetIDPrefix)
    print "Target has %s nodes (T, NT)" % tigerHelper.count
    logger.info("Applying mapping to target.")
    applyMappingToTarget(
        mapping, alignment, tigerHelper, stripTargetIDPrefix)
    print ("Source nodes with sentiment, not in alignment: %s"
           % countSourceNotInAlignment)
    print ("Nodes with sentiment and alignment, but not found "
           + "in target tree: %s" % countTargetNotFound)
    print ("Sentiment label projected using alignment for %s nodes"
           % countMappingApplied)
    logger.info("Done applying mapping.")
    logger.info("Unary-collapsing nodes in target tree.")
    tigerHelper.collapseUnary()
    logger.info("Done collapsing unary nodes.")
    print ("After collapsing unary nodes, Target has %s nodes (T, NT)"
           % tigerHelper.count)
    logger.info("Fixing up remaining nodes")
    # Need to map root sentiment before looking up parent sentiment
    # so we can use the new information
    if projectRootSentiment:
        logger.info("Projecting root sentiment for unaligned root nodes.")
        mapRootSentiment(ma_util.readPenn(annotations), tigerHelper)
        logger.info("Done projecting root sentiment.")
    if applyParentSentiment:
        logger.info("Using parent lookup for nodes with"
                    + "unknown sentiment values.")
        (modTree, count) = tigerHelper.applyParentSentimentValue()
        print "Applied parent sentiment value for %s nodes" % count
    else:
        logger.info("Using default for nodes with unknown sentiment values.")
        (modTree, count) = tigerHelper.applyDefaultSentimentValue()
        print "Applied default sentiment value for %s nodes" % count
    logger.info("Done fixing up remaining nodes.")
    logger.info("Saving to disk...")
    tigerHelper.tree.write(output)
    logger.info("Done!")