def main(self, conceptMap, symbolMap, symbolDt, dcpts, dpmfs, symbolName, outDir):
        conceptMap = LexMap().read(conceptMap)
        symbolMap = LexMap().read(symbolMap)

        _sink_ = int(symbolMap["_sink_"])
        _SINK_ = int(conceptMap["_SINK_"])

        conceptCard = len(conceptMap)
        symbolCard = len(symbolMap)

        jointDtC1C2C3C4 = gmtk.readDt(symbolDt)
        jointDcptC1C2C3C4 = gmtk.readDcpt(dcpts, "jointProbC1C2C3C4")
        jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4, jointDcptC1C2C3C4)

        symbolDpmfC1C2C3C4 = gmtk.readDpmf(dpmfs, "%sGivenC1C2C3C4" % symbolName)
        symbolGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4, symbolDpmfC1C2C3C4)

        jointProbSymbolC1C2C3C4 = symbolGivenC1C2C3C4.multiple([1, 2, 3, 4], jointProbC1C2C3C4)
        gmtk.saveDpmfsProbs(
            outDir,
            "%sGivenC1C2C3C4" % symbolName,
            len(symbolGivenC1C2C3C4.vectSubList([1, 2, 3, 4])),
            symbolCard,
            symbolGivenC1C2C3C4,
        )

        jointProbSymbolC1C2C3 = jointProbSymbolC1C2C3C4.marginalize([0, 1, 2, 3])
        symbolGivenC1C2C3 = jointProbSymbolC1C2C3.conditionalize([1, 2, 3])
        gmtk.saveDpmfsProbs(
            outDir,
            "%sGivenC1C2C3" % symbolName,
            len(symbolGivenC1C2C3.vectSubList([1, 2, 3])),
            symbolCard,
            symbolGivenC1C2C3,
        )

        jointProbSymbolC1C2 = jointProbSymbolC1C2C3.marginalize([0, 1, 2])
        symbolGivenC1C2 = jointProbSymbolC1C2.conditionalize([1, 2])
        gmtk.saveDpmfsProbs(
            outDir, "%sGivenC1C2" % symbolName, len(symbolGivenC1C2.vectSubList([1, 2])), symbolCard, symbolGivenC1C2
        )

        jointProbSymbolC1 = jointProbSymbolC1C2.marginalize([0, 1])
        symbolGivenC1 = jointProbSymbolC1.conditionalize([1])

        # in case of conditioning by _SINK_ I have to enable to generate _sink_ word only
        # otherwise I would see _SINK_ concept in the stack
        # TODO: Turn it into validator
        symbolGivenC1.setValue([_sink_, _SINK_], 1)

        gmtk.saveDcptBigram(outDir, "%sGivenC1" % symbolName, symbolCard, conceptCard, symbolGivenC1)

        symbolUnigram = jointProbSymbolC1.marginalize([0])

        # I need to enable to decode _unseen_ word only ! so set the probability of
        # generating _empty_ to zero
        symbolUnigram.setValue([int(symbolMap["_empty_"])], 0)
        # normalize sum of probabilities to one
        symbolUnigram = symbolUnigram.normJoint()

        gmtk.saveDcptUnigram(outDir, "%sUnigram" % symbolName, symbolCard, symbolUnigram)

        gmtk.saveDcptUnseen(outDir, "%sZerogram" % symbolName, symbolCard, symbolMap)
Example #2
0
if verbose:
    print("Start")
    print("-------------------------------------------------")

    print("Smoothing concept1GivenC2C3C4.")

conceptMap = LexMap().read(conceptFileName)
wordMap = LexMap().read(wordFileName)
conceptCard = len(conceptMap)
wordCard = len(wordMap)

jointDtC1C2C3C4 = gmtk.readDt(wordDtFileName)
jointDcptC1C2C3C4 = gmtk.readDcpt(dcptsFileName, "jointProbC1C2C3C4")
assert toolkit.testProb(jointDcptC1C2C3C4), "Sum of probabilities should be always 1."

jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4, jointDcptC1C2C3C4) 

# desable generating _EMPTY_ by _EMPTY_, _EMPTY_, _EMPTY_
# jointProbC1C2C3C4.setValue([int(conceptMap["_EMPTY_"]), 
#                             int(conceptMap["_EMPTY_"]), 
#                             int(conceptMap["_EMPTY_"]), 
#                             int(conceptMap["_EMPTY_"])], 0)

##concept1DtC2C3C4 = gmtk.readDt(concept1DtFileName)
##concept1DpmfC2C3C4 = gmtk.readDpmf(dpmfsFileName, "concept1GivenC2C3C4")
##assert toolkit.testProb2(concept1DpmfC2C3C4), "Sum of probabilities should be always 1."
##
##concept1GivenC2C3C4 = gmtk.combineDtDcpt2(concept1DtC2C3C4, concept1DpmfC2C3C4)
concept1GivenC2C3C4 = jointProbC1C2C3C4.conditionalize([1, 2, 3])

##if verbose:
    def main(self, conceptMap, symbolMap, symbolDt, dcpts, dpmfs, symbolName,
             outDir):
        conceptMap = LexMap().read(conceptMap)
        symbolMap = LexMap().read(symbolMap)

        _sink_ = int(symbolMap['_sink_'])
        _SINK_ = int(conceptMap['_SINK_'])

        conceptCard = len(conceptMap)
        symbolCard = len(symbolMap)

        jointDtC1C2C3C4 = gmtk.readDt(symbolDt)
        jointDcptC1C2C3C4 = gmtk.readDcpt(dcpts, "jointProbC1C2C3C4")
        jointProbC1C2C3C4 = gmtk.combineDtDcpt1(jointDtC1C2C3C4,
                                                jointDcptC1C2C3C4)

        symbolDpmfC1C2C3C4 = gmtk.readDpmf(dpmfs,
                                           "%sGivenC1C2C3C4" % symbolName)
        symbolGivenC1C2C3C4 = gmtk.combineDtDcpt2(jointDtC1C2C3C4,
                                                  symbolDpmfC1C2C3C4)

        jointProbSymbolC1C2C3C4 = symbolGivenC1C2C3C4.multiple(
            [1, 2, 3, 4], jointProbC1C2C3C4)
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2C3C4" % symbolName,
                            len(symbolGivenC1C2C3C4.vectSubList([1, 2, 3, 4])),
                            symbolCard, symbolGivenC1C2C3C4)

        jointProbSymbolC1C2C3 = jointProbSymbolC1C2C3C4.marginalize(
            [0, 1, 2, 3])
        symbolGivenC1C2C3 = jointProbSymbolC1C2C3.conditionalize([1, 2, 3])
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2C3" % symbolName,
                            len(symbolGivenC1C2C3.vectSubList([1, 2, 3])),
                            symbolCard, symbolGivenC1C2C3)

        jointProbSymbolC1C2 = jointProbSymbolC1C2C3.marginalize([0, 1, 2])
        symbolGivenC1C2 = jointProbSymbolC1C2.conditionalize([1, 2])
        gmtk.saveDpmfsProbs(outDir, "%sGivenC1C2" % symbolName,
                            len(symbolGivenC1C2.vectSubList([1, 2])),
                            symbolCard, symbolGivenC1C2)

        jointProbSymbolC1 = jointProbSymbolC1C2.marginalize([0, 1])
        symbolGivenC1 = jointProbSymbolC1.conditionalize([1])

        # in case of conditioning by _SINK_ I have to enable to generate _sink_ word only
        # otherwise I would see _SINK_ concept in the stack
        # TODO: Turn it into validator
        symbolGivenC1.setValue([_sink_, _SINK_], 1)

        gmtk.saveDcptBigram(outDir, "%sGivenC1" % symbolName, symbolCard,
                            conceptCard, symbolGivenC1)

        symbolUnigram = jointProbSymbolC1.marginalize([0])

        # I need to enable to decode _unseen_ word only ! so set the probability of
        # generating _empty_ to zero
        symbolUnigram.setValue([int(symbolMap["_empty_"])], 0)
        # normalize sum of probabilities to one
        symbolUnigram = symbolUnigram.normJoint()

        gmtk.saveDcptUnigram(outDir, "%sUnigram" % symbolName, symbolCard,
                             symbolUnigram)

        gmtk.saveDcptUnseen(outDir, "%sZerogram" % symbolName, symbolCard,
                            symbolMap)