Python getSentencesの例

プログラミング言語: Python

名前空間/パッケージ名: tigerhelper

メソッド/関数: getSentences

hotexamples.comのコード掲載数: 4

Python getSentences - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtigerhelper.getSentencesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: project.py プロジェクト: mhaas/ma-thesis

def mapRootSentiment(source, tigerHelper, force=False):
    """Maps top-level sentiment between source and target sentences.

    If the target root node is not aligned and only has the default
    sentiment, we apply the sentiment value of the source root node
    to the target root node. We assume that root nodes always are implicitly
    aligned.

    The optional force parameter specifies whether the root sentiment
    is always mapped. This will override any previous mapping based on
    node alignments.

    @param source {Iterable{nltk.trees.Tree}} Source PTB trees
    @param target {etree} Target TigerXML tree, will be modified
    @param force {boolean} If True, always map sentiment between root nodes
    @returns Modified TigerXML tree
    """
    for (sourceSentence, targetSentence) in itertools.izip_longest(
            source,
            th.getSentences(tigerHelper.tree),
            fillvalue="LIST_LENGTH_NOT_EQUAL"):
        rootNode = tigerHelper.getSentenceRoot(targetSentence)
        metaS = rootNode.get("x-sentiment")
        # we will typically get here before default sentiment values
        # have been applied, so metaS might be None.
        #assert metaS is not None
        if (not force and metaS == th.SEN_MAPPED):
            continue
        else:
            logger.debug("Mapping root sentiment %s for target %s",
                         sourceSentence.node, th.getNodeID(rootNode))
            th.setSentiment(rootNode, sourceSentence.node, th.SEN_MAPPED_ROOT)

コード例 #2

ファイルを表示

ファイル: ensemble.py プロジェクト: mhaas/ma-thesis

    def extract_features(self, trees, projected, extractLabels):
        """
        Turns a set of parse trees into feature vectors.

        @param trees {Iterable<nltk.tree.Tree>} Source data
        @param projected {basestring} Filename of projected trees (TigerXML)
        @param extractLabels {bool} Whether to return node labels or not
        @returns {tuple} Tuple of lists for features and labels
        """
        projectedSentences = []
        if PROJ in self.features:
            projectedTiger = th.TigerHelper(projected)
            projectedSentences = th.getSentences(projectedTiger.tree)
        nodeLabels = []
        data = []
        rootIndices = []
        for (projectedSentence,
             treeSentence) in itertools.izip_longest(projectedSentences,
                                                     trees,
                                                     fillvalue=
                                                     "LISTLEN_NEQ"):
            if projectedSentence == "LISTLEN_NEQ":
                raise ValueError("projectedSentences too short!")
            if treeSentence == "LISTLEN_NEQ":
                raise ValueError("trees too short!")
            rootIndices.append(len(data))
            # Variables are only set if features are to be extracted
            projS = None
            labelsS = None
            sentiWSPosS = None
            sentiWSNegS = None
            pos = None
            (phraseS,
             regScores,
             counts) = self.ppE.extract_phrase_predictor_sentiment(
                treeSentence)
            if PROJ in self.features:
                projS = self.projE.extract_projection_sentiment(
                    projectedTiger,
                    projectedSentence)
            # Does the tree contain gold or is it just any old tree?
            # To clarify: a tree is not necessarily annotated with a sentiment
            # label. It could also be a vanilla parse tree.
            if extractLabels:
                labelsS = self.goldE.extract_gold_sentiment(treeSentence)
                assert len(labelsS) == len(phraseS)
            if SENTIWS in self.features:
                (sentiWSPosS,
                 sentiWSNegS) = self.sentiWSE.extract_sentiWS(treeSentence)
            if POSF in self.features:
                pos = self.posE.extract_POS(treeSentence)
            vectors = self.build_item_vectors(phraseS, projS,
                                              regScores, counts,
                                              sentiWSPosS, sentiWSNegS, pos)
            data.extend(vectors)
            if extractLabels:
                nodeLabels.extend(labelsS)
        return (numpy.asarray(data), numpy.asarray(nodeLabels), rootIndices)

コード例 #3

ファイルを表示

ファイル: 10foldcv.py プロジェクト: mhaas/ma-thesis

def read_tiger_items(fileName):
    helper = th.TigerHelper(fileName)
    res = []
    for item in th.getSentences(helper.tree):
        # have to serialize these, or numpy.array_split
        # will return weird splits. It looks like it uses the individual
        # sentence nodes as iterables.
        res.append(etree.tostring(item))
    return res

コード例 #4

ファイルを表示

ファイル: evaluate.py プロジェクト: mhaas/ma-thesis

def evaluate(predictFile, goldFile,
             showPercentages, dumpFile, csvFile, runName):
    if dumpFile is not None:
        dumpFile = open(dumpFile, "w")
    rootPredictedLabels = []
    rootGoldLabels = []
    predictedLabels = []
    goldLabels = []
    mappedPredictedLabels = []
    mappedGoldLabels = []
    predictTiger = th.TigerHelper(predictFile)
    predictSentences = th.getSentences(predictTiger.tree)
    gold = shared.ma_util.readPenn(goldFile)
    for (predictSentence,
         goldSentence) in itertools.izip_longest(predictSentences,
                                                 gold,
                                                 fillvalue=
                                                 "LIST_LENGTH_NOT_EQUAL"):
        rootPredictedLabels.append(predictTiger.getSentenceSentiment(
            predictSentence, forceSentiment=True))
        rootGoldLabels.append(goldSentence.node)
        # print "#" * 16
        for (predictNode, goldNode) in itertools.izip_longest(
                predictTiger.preOrder(predictSentence, forceSentiment=True),
                shared.ma_util.walkTree(goldSentence),
                fillvalue="LIST_LENGTH_NOT_EQUAL"):
            predictedSentiment = predictNode[1]
            #if (predictedSentiment is None):
                #print predictNode
            predictedLabels.append(predictedSentiment)
            # print "=" * 8
            # print goldNode
            # print "-" * 8
            # print predictNode
            # print "=" * 8
            goldSentiment = goldNode.node
            goldLabels.append(goldSentiment)
            if predictNode[2] != th.SEN_DEFAULT:
                mappedPredictedLabels.append(predictedSentiment)
                mappedGoldLabels.append(goldSentiment)
            if dumpFile and (mapNumToS(predictedSentiment)
                             != mapNumToS(goldSentiment)):
                dumpFile.write("=" * 8)
                dumpFile.write("\n")
                dumpFile.write("Prediction error.\n")
                dumpFile.write("gold: ")
                dumpFile.write(str(goldNode))
                dumpFile.write("\n")
                dumpFile.write("predicted:")
                dumpFile.write(str(predictNode[1]))
                dumpFile.write("\n")
                dumpFile.write("=" * 8)
                dumpFile.write("\n")
    if dumpFile is not None:
        dumpFile.close()
    print "All node labels"
    allNodes = printStats(goldLabels, predictedLabels,
                          showPercentages)
    print ""
    print "Mapped node labels only (No default)"
    noDefault = printStats(mappedGoldLabels, mappedPredictedLabels,
                           showPercentages, prefix="noDefault")
    print "Skipped %s default labels" % (len(goldLabels)
                                         - len(mappedGoldLabels))
    print ""
    print "Root labels"
    rootLabels = printStats(rootGoldLabels, rootPredictedLabels,
                            showPercentages, prefix="root")

    allNodes = shared.evaluate.ins(['run', "type"], [runName, "all nodes"],
                                   allNodes)
    noDefault = shared.evaluate.ins(['run', 'type'], [runName, "no default"],
                                    noDefault)
    allNodes.update(rootLabels)
    shared.evaluate.statsToFile(allNodes, csvFile)