def encodeDataToWordList(data, bytesForSize, markovChain, wordsPerState=1):

    # encode the data length first
    lenData = utils.convertNumberToByteList(len(data), bytesForSize)
    bitsField = bigBitField.BigBitField(lenData)

    if wordsPerState == 1:
        lastWord = config.startSymbol
    elif wordsPerState == 2:
        lastWord = (config.startSymbol, config.startSymbol)

    lenDataCode = fixedSizeCode.encodeBitsToWordList(bitsField, markovChain,
                                                     lastWord, wordsPerState)

    # compute last word (or bigram)
    if wordsPerState == 1:
        lastWord = lenDataCode[-1]
    elif wordsPerState == 2:
        if len(lenDataCode) <= 1:
            lastWord = (config.startSymbol, lenDataCode[-1])
        else:
            lastWord = (lenDataCode[-2], lenDataCode[-1])

        if lastWord[1] == config.startSymbol:
            lastWord = (config.startSymbol, config.startSymbol)

    # encode the actual message
    bitsField = bigBitField.BigBitField(data)
    mainDataCode = fixedSizeCode.encodeBitsToWordList(bitsField, markovChain,
                                                      lastWord, wordsPerState)

    return lenDataCode + mainDataCode
def testExample(data, markovChain):
    print "---"
    print "input: " + repr(data)
    miniBF = bigBitField.BigBitField(data, False)
    print "output: " + repr(
        fixedSizeCode.encodeBitsToWordList(miniBF, markovChain,
                                           config.startSymbol, 1))
Example #3
0
def decodeWordListToBits(words,
                         maxDigits,
                         markovChain,
                         previousWord=config.startSymbol,
                         wordsPerState=1):

    bitsRange = ["0", "1"]
    bitsField = bigBitField.BigBitField()
    wordsUsed = 0
    markovChainDict = utils.markovChainToDictionary(markovChain)
    lastTime = time.time()
    secondsForStatusPrint = 20

    for word in words:
        bitsRange = decodeWordToBitsRange(
            word, previousWord, markovChainDict,
            maxDigits - bitsField.totalFieldLen(), bitsRange)
        wordsUsed = wordsUsed + 1

        # compute previous word (or bigram) for next iteration
        if wordsPerState == 1:
            previousWord = word
        elif wordsPerState == 2:
            if word == config.startSymbol:
                previousWord = (config.startSymbol, config.startSymbol)
            else:
                previousWord = (previousWord[1], word)

        # simplify range, remove bits and add them to the field
        bitsRange2 = utils.removeCommonBitsInRange(bitsRange)
        bitsRemovedLen = len(bitsRange[0]) - len(bitsRange2[0])
        if bitsRemovedLen + bitsField.totalFieldLen() > maxDigits:
            bitsRemovedLen = maxDigits - bitsField.totalFieldLen()
        bitsField.pushQueueNBits(bitsRange[0][0:bitsRemovedLen])
        bitsRange = bitsRange2

        if time.time() - lastTime > secondsForStatusPrint:
            print(" - decoded bits so far: " + repr(bitsField.totalFieldLen()))
            lastTime = time.time()

        # we exit when our range describes only one number
        if bitsField.totalFieldLen() == maxDigits:
            break
        if bitsField.totalFieldLen(
        ) == maxDigits - 1 and bitsRange[0][0] == bitsRange[1][0]:
            bitsField.pushQueueNBits(bitsRange[0][0])
            break

    return (bitsField, wordsUsed)
            break

    return words


if __name__ == '__main__':
    print "testing fixedSizeCode.py"

    testMarkov = config.testMarkov
    testMarkovDict = utils.markovChainToDictionary(testMarkov)

    testMarkov2 = config.testMarkov2
    testMarkovDict2 = utils.markovChainToDictionary(testMarkov2)

    # this is "01000110 01011010 11111111"
    testBitField = bigBitField.BigBitField([70, 90, 255])

    print "A:"
    print encodeBitsToWord(testBitField, ["0", "1"], config.startSymbol,
                           testMarkovDict) == ('A', ('00', '01'))
    print "A2:"
    print encodeBitsToWord(testBitField, ["0", "1"],
                           (config.startSymbol, config.startSymbol),
                           testMarkovDict2) == ('A', ('00', '01'))
    print "B:"
    print encodeBitsToWord(testBitField, ["0", "1"], "A",
                           testMarkovDict) == ('B', ('01', '01'))
    print "B2:"
    print encodeBitsToWord(testBitField, ["0", "1"], (config.startSymbol, "A"),
                           testMarkovDict2) == ('B', ('01', '01'))
    print "C:"