def main(self, conceptMap, symbolMap, symbolDt, dcpts, dpmfs, symbolName, outDir):
        conceptMap = LexMap().read(conceptMap)
        symbolMap = LexMap().read(symbolMap)

        _sink_ = int(symbolMap["_sink_"])
        _SINK_ = int(conceptMap["_SINK_"])

        conceptCard = len(conceptMap)
        symbolCard = len(symbolMap)

        jointDtC1C2C3C4 = gmtk.readDt(symbolDt)
        jointDcptC1C2C3C4 = gmtk.readDcpt(dcpts, "jointProbC1C2C3C4")
        jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4, jointDcptC1C2C3C4)

        symbolDpmfC1C2C3C4 = gmtk.readDpmf(dpmfs, "%sGivenC1C2C3C4" % symbolName)
        symbolGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4, symbolDpmfC1C2C3C4)

        jointProbSymbolC1C2C3C4 = symbolGivenC1C2C3C4.multiple([1, 2, 3, 4], jointProbC1C2C3C4)
        gmtk.saveDpmfsProbs(
            outDir,
            "%sGivenC1C2C3C4" % symbolName,
            len(symbolGivenC1C2C3C4.vectSubList([1, 2, 3, 4])),
            symbolCard,
            symbolGivenC1C2C3C4,
        )

        jointProbSymbolC1C2C3 = jointProbSymbolC1C2C3C4.marginalize([0, 1, 2, 3])
        symbolGivenC1C2C3 = jointProbSymbolC1C2C3.conditionalize([1, 2, 3])
        gmtk.saveDpmfsProbs(
            outDir,
            "%sGivenC1C2C3" % symbolName,
            len(symbolGivenC1C2C3.vectSubList([1, 2, 3])),
            symbolCard,
            symbolGivenC1C2C3,
        )

        jointProbSymbolC1C2 = jointProbSymbolC1C2C3.marginalize([0, 1, 2])
        symbolGivenC1C2 = jointProbSymbolC1C2.conditionalize([1, 2])
        gmtk.saveDpmfsProbs(
            outDir, "%sGivenC1C2" % symbolName, len(symbolGivenC1C2.vectSubList([1, 2])), symbolCard, symbolGivenC1C2
        )

        jointProbSymbolC1 = jointProbSymbolC1C2.marginalize([0, 1])
        symbolGivenC1 = jointProbSymbolC1.conditionalize([1])

        # in case of conditioning by _SINK_ I have to enable to generate _sink_ word only
        # otherwise I would see _SINK_ concept in the stack
        # TODO: Turn it into validator
        symbolGivenC1.setValue([_sink_, _SINK_], 1)

        gmtk.saveDcptBigram(outDir, "%sGivenC1" % symbolName, symbolCard, conceptCard, symbolGivenC1)

        symbolUnigram = jointProbSymbolC1.marginalize([0])

        # I need to enable to decode _unseen_ word only ! so set the probability of
        # generating _empty_ to zero
        symbolUnigram.setValue([int(symbolMap["_empty_"])], 0)
        # normalize sum of probabilities to one
        symbolUnigram = symbolUnigram.normJoint()

        gmtk.saveDcptUnigram(outDir, "%sUnigram" % symbolName, symbolCard, symbolUnigram)

        gmtk.saveDcptUnseen(outDir, "%sZerogram" % symbolName, symbolCard, symbolMap)
Exemple #2
0
#                             int(conceptMap["_EMPTY_"]), 
#                             int(conceptMap["_EMPTY_"]), 
#                             int(conceptMap["_EMPTY_"])], 0)

##concept1DtC2C3C4 = gmtk.readDt(concept1DtFileName)
##concept1DpmfC2C3C4 = gmtk.readDpmf(dpmfsFileName, "concept1GivenC2C3C4")
##assert toolkit.testProb2(concept1DpmfC2C3C4), "Sum of probabilities should be always 1."
##
##concept1GivenC2C3C4 = gmtk.combineDtDcpt2(concept1DtC2C3C4, concept1DpmfC2C3C4)
concept1GivenC2C3C4 = jointProbC1C2C3C4.conditionalize([1, 2, 3])

##if verbose:
##    print "Orig : Probs %4d : Parents %4d" % (len(concept1GivenC2C3C4.vectList()), len(concept1GivenC2C3C4.vectSubList([1, 2, 3])))
##    print "Comp : Probs %4d : Parents %4d" % (len(concept1GivenC2C3C4X.vectList()), len(concept1GivenC2C3C4X.vectSubList([1, 2, 3])))

gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3C4", len(concept1GivenC2C3C4.vectSubList([1, 2, 3])), conceptCard, concept1GivenC2C3C4)
##gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3C4X", len(concept1GivenC2C3C4X.vectSubList([1, 2, 3])), conceptCard, concept1GivenC2C3C4X)

##############################################################################
# save trigrams
jointProbC1C2C3 = jointProbC1C2C3C4.marginalize([0, 1, 2])
concept1GivenC2C3 = jointProbC1C2C3.conditionalize([1, 2])

gmtk.saveDpmfsProbs(dirOut, "concept1GivenC2C3", len(concept1GivenC2C3.vectSubList([1, 2])), conceptCard, concept1GivenC2C3)

##############################################################################
# save bigrams as CPT
jointProbC1C2 = jointProbC1C2C3.marginalize([0, 1])
concept1GivenC2 = jointProbC1C2.conditionalize([1])

# in case of conditioning by _SINK_ I have to enable to generate _SINK_ concept only
                if table.getSafeValue(index) >= EPSILON:
                    new_value = table.getSafeValue(index)/sum
                    table.setValue(index, new_value)

        return table

    def storeResults(self, outDir, symName, (dt4, dt3, dt2), word_C, constFile=None, constAppend=True):
        wordCard = len(self.symMap)
        # Save 5-grams
        NAME = '%sGivenC1C2C3C4' % symName
        numberOfSpmfs = gmtk.saveDt(outDir, NAME, dt4, 4)
        gmtk.saveCollection(outDir, NAME, numberOfSpmfs)
        gmtk.saveSpmfs(outDir, NAME, numberOfSpmfs, wordCard)

        number = len(word_C.vectSubList([1, 2, 3, 4]))
        gmtk.saveDpmfsProbs(outDir, NAME, number, wordCard, word_C)

        if constFile:
            if constAppend:
                const_fw = file(constFile, 'a')
            else:
                const_fw = file(writeConst, 'w')
            try:
                const_fw.write("\n% the cardinality should be CONCEPT_CARD^DEPTH_OF_STACK, but I know that the stack values are sparse\n")
                const_fw.write("#define JOINT_C1C2C3C4_CARD     %d\n" % numberOfSpmfs)
            finally:
                const_fw.close()

        # save 4-grams
        NAME = '%sGivenC1C2C3' % symName
        numberOfSpmfs = gmtk.saveDt(outDir, NAME, dt3, 3)
    def main(self, conceptMap, symbolMap, symbolDt, dcpts, dpmfs, symbolName,
             outDir):
        conceptMap = LexMap().read(conceptMap)
        symbolMap = LexMap().read(symbolMap)

        _sink_ = int(symbolMap['_sink_'])
        _SINK_ = int(conceptMap['_SINK_'])

        conceptCard = len(conceptMap)
        symbolCard = len(symbolMap)

        jointDtC1C2C3C4 = gmtk.readDt(symbolDt)
        jointDcptC1C2C3C4 = gmtk.readDcpt(dcpts, "jointProbC1C2C3C4")
        jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4,
                                                jointDcptC1C2C3C4)

        symbolDpmfC1C2C3C4 = gmtk.readDpmf(dpmfs,
                                           "%sGivenC1C2C3C4" % symbolName)
        symbolGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4,
                                                  symbolDpmfC1C2C3C4)

        jointProbSymbolC1C2C3C4 = symbolGivenC1C2C3C4.multiple(
            [1, 2, 3, 4], jointProbC1C2C3C4)
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2C3C4" % symbolName,
                            len(symbolGivenC1C2C3C4.vectSubList([1, 2, 3, 4])),
                            symbolCard, symbolGivenC1C2C3C4)

        jointProbSymbolC1C2C3 = jointProbSymbolC1C2C3C4.marginalize(
            [0, 1, 2, 3])
        symbolGivenC1C2C3 = jointProbSymbolC1C2C3.conditionalize([1, 2, 3])
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2C3" % symbolName,
                            len(symbolGivenC1C2C3.vectSubList([1, 2, 3])),
                            symbolCard, symbolGivenC1C2C3)

        jointProbSymbolC1C2 = jointProbSymbolC1C2C3.marginalize([0, 1, 2])
        symbolGivenC1C2 = jointProbSymbolC1C2.conditionalize([1, 2])
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2" % symbolName,
                            len(symbolGivenC1C2.vectSubList([1, 2])),
                            symbolCard, symbolGivenC1C2)

        jointProbSymbolC1 = jointProbSymbolC1C2.marginalize([0, 1])
        symbolGivenC1 = jointProbSymbolC1.conditionalize([1])

        # in case of conditioning by _SINK_ I have to enable to generate _sink_ word only
        # otherwise I would see _SINK_ concept in the stack
        # TODO: Turn it into validator
        symbolGivenC1.setValue([_sink_, _SINK_], 1)

        gmtk.saveDcptBigram(outDir, "%sGivenC1" % symbolName, symbolCard,
                            conceptCard, symbolGivenC1)

        symbolUnigram = jointProbSymbolC1.marginalize([0])

        # I need to enable to decode _unseen_ word only ! so set the probability of
        # generating _empty_ to zero
        symbolUnigram.setValue([int(symbolMap["_empty_"])], 0)
        # normalize sum of probabilities to one
        symbolUnigram = symbolUnigram.normJoint()

        gmtk.saveDcptUnigram(outDir, "%sUnigram" % symbolName, symbolCard,
                             symbolUnigram)

        gmtk.saveDcptUnseen(outDir, "%sZerogram" % symbolName, symbolCard,
                            symbolMap)
Exemple #5
0
pushGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4, pushDpmfC1C2C3C4)

#
# jointProbPC1C2C3C4 = pushGivenC1C2C3C4 * jointProbC1C2C3C4
#

jointProbPC1C2C3C4 = pushGivenC1C2C3C4.multiple([1, 2, 3, 4],
                                                jointProbC1C2C3C4)

##############################################################################
# save pentagrams
pushGivenC1C2C3C4 = pushGivenC1C2C3C4.insertPenalty(0, penalty, pushCard)

gmtk.saveDpmfsProbs(dirOut, "pushGivenC1C2C3C4",
                    len(pushGivenC1C2C3C4.vectSubList([1, 2, 3, 4])), pushCard,
                    pushGivenC1C2C3C4, -penalty)

##############################################################################
# save quatrograms
jointProbPC1C2C3 = jointProbPC1C2C3C4.marginalize([0, 1, 2, 3])
pushGivenC1C2C3 = jointProbPC1C2C3.conditionalize([1, 2, 3])
pushGivenC1C2C3 = pushGivenC1C2C3.insertPenalty(0, penalty, pushCard)

gmtk.saveDpmfsProbs(dirOut, "pushGivenC1C2C3",
                    len(pushGivenC1C2C3.vectSubList([1, 2, 3])), pushCard,
                    pushGivenC1C2C3, -penalty)

##############################################################################
# save trigrams
jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2])
popDpmfC1C2C3C4 = gmtk.readDpmf(dpmfsFileName, "popGivenC1C2C3C4")
assert toolkit.testProb2(popDpmfC1C2C3C4), "Sum of probabilities should be always 1."

popGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4, popDpmfC1C2C3C4)

#
# jointProbPC1C2C3C4 = popGivenC1C2C3C4 * jointProbC1C2C3C4
#

jointProbPC1C2C3C4 = popGivenC1C2C3C4.multiple([1, 2, 3, 4], jointProbC1C2C3C4)

##############################################################################
# save pentagrams
popGivenC1C2C3C4 = popGivenC1C2C3C4.insertPenalty(0, penalty, popCard)

gmtk.saveDpmfsProbs(dirOut, "popGivenC1C2C3C4", len(popGivenC1C2C3C4.vectSubList([1, 2, 3, 4])), popCard, popGivenC1C2C3C4, -penalty)

##############################################################################
# save quatrograms
jointProbPC1C2C3 = jointProbPC1C2C3C4.marginalize([0, 1, 2, 3])
popGivenC1C2C3 = jointProbPC1C2C3.conditionalize([1, 2, 3])
popGivenC1C2C3 = popGivenC1C2C3.insertPenalty(0, penalty, popCard)

gmtk.saveDpmfsProbs(dirOut, "popGivenC1C2C3", len(popGivenC1C2C3.vectSubList([1, 2, 3])), popCard, popGivenC1C2C3, -penalty)

##############################################################################
# save trigrams
jointProbPC1C2 = jointProbPC1C2C3.marginalize([0, 1, 2])
popGivenC1C2 = jointProbPC1C2.conditionalize([1, 2])
popGivenC1C2 = popGivenC1C2.insertPenalty(0, penalty, popCard)
    def storeResults(self,
                     outDir,
                     symName,
                     (dt4, dt3, dt2),
                     word_C,
                     constFile=None,
                     constAppend=True):
        wordCard = len(self.symMap)
        # Save 5-grams
        NAME = '%sGivenC1C2C3C4' % symName
        numberOfSpmfs = gmtk.saveDt(outDir, NAME, dt4, 4)
        gmtk.saveCollection(outDir, NAME, numberOfSpmfs)
        gmtk.saveSpmfs(outDir, NAME, numberOfSpmfs, wordCard)

        number = len(word_C.vectSubList([1, 2, 3, 4]))
        gmtk.saveDpmfsProbs(outDir, NAME, number, wordCard, word_C)

        if constFile:
            if constAppend:
                const_fw = file(constFile, 'a')
            else:
                const_fw = file(writeConst, 'w')
            try:
                const_fw.write(
                    "\n% the cardinality should be CONCEPT_CARD^DEPTH_OF_STACK, but I know that the stack values are sparse\n"
                )
                const_fw.write("#define JOINT_C1C2C3C4_CARD     %d\n" %
                               numberOfSpmfs)
            finally:
                const_fw.close()