def encodeDataToWordList(data, bytesForSize, markovChain, wordsPerState=1): # encode the data length first lenData = utils.convertNumberToByteList(len(data), bytesForSize) bitsField = bigBitField.BigBitField(lenData) if wordsPerState == 1: lastWord = config.startSymbol elif wordsPerState == 2: lastWord = (config.startSymbol, config.startSymbol) lenDataCode = fixedSizeCode.encodeBitsToWordList(bitsField, markovChain, lastWord, wordsPerState) # compute last word (or bigram) if wordsPerState == 1: lastWord = lenDataCode[-1] elif wordsPerState == 2: if len(lenDataCode) <= 1: lastWord = (config.startSymbol, lenDataCode[-1]) else: lastWord = (lenDataCode[-2], lenDataCode[-1]) if lastWord[1] == config.startSymbol: lastWord = (config.startSymbol, config.startSymbol) # encode the actual message bitsField = bigBitField.BigBitField(data) mainDataCode = fixedSizeCode.encodeBitsToWordList(bitsField, markovChain, lastWord, wordsPerState) return lenDataCode + mainDataCode
def testExample(data, markovChain): print "---" print "input: " + repr(data) miniBF = bigBitField.BigBitField(data, False) print "output: " + repr( fixedSizeCode.encodeBitsToWordList(miniBF, markovChain, config.startSymbol, 1))
def decodeWordListToBits(words, maxDigits, markovChain, previousWord=config.startSymbol, wordsPerState=1): bitsRange = ["0", "1"] bitsField = bigBitField.BigBitField() wordsUsed = 0 markovChainDict = utils.markovChainToDictionary(markovChain) lastTime = time.time() secondsForStatusPrint = 20 for word in words: bitsRange = decodeWordToBitsRange( word, previousWord, markovChainDict, maxDigits - bitsField.totalFieldLen(), bitsRange) wordsUsed = wordsUsed + 1 # compute previous word (or bigram) for next iteration if wordsPerState == 1: previousWord = word elif wordsPerState == 2: if word == config.startSymbol: previousWord = (config.startSymbol, config.startSymbol) else: previousWord = (previousWord[1], word) # simplify range, remove bits and add them to the field bitsRange2 = utils.removeCommonBitsInRange(bitsRange) bitsRemovedLen = len(bitsRange[0]) - len(bitsRange2[0]) if bitsRemovedLen + bitsField.totalFieldLen() > maxDigits: bitsRemovedLen = maxDigits - bitsField.totalFieldLen() bitsField.pushQueueNBits(bitsRange[0][0:bitsRemovedLen]) bitsRange = bitsRange2 if time.time() - lastTime > secondsForStatusPrint: print(" - decoded bits so far: " + repr(bitsField.totalFieldLen())) lastTime = time.time() # we exit when our range describes only one number if bitsField.totalFieldLen() == maxDigits: break if bitsField.totalFieldLen( ) == maxDigits - 1 and bitsRange[0][0] == bitsRange[1][0]: bitsField.pushQueueNBits(bitsRange[0][0]) break return (bitsField, wordsUsed)
break return words if __name__ == '__main__': print "testing fixedSizeCode.py" testMarkov = config.testMarkov testMarkovDict = utils.markovChainToDictionary(testMarkov) testMarkov2 = config.testMarkov2 testMarkovDict2 = utils.markovChainToDictionary(testMarkov2) # this is "01000110 01011010 11111111" testBitField = bigBitField.BigBitField([70, 90, 255]) print "A:" print encodeBitsToWord(testBitField, ["0", "1"], config.startSymbol, testMarkovDict) == ('A', ('00', '01')) print "A2:" print encodeBitsToWord(testBitField, ["0", "1"], (config.startSymbol, config.startSymbol), testMarkovDict2) == ('A', ('00', '01')) print "B:" print encodeBitsToWord(testBitField, ["0", "1"], "A", testMarkovDict) == ('B', ('01', '01')) print "B2:" print encodeBitsToWord(testBitField, ["0", "1"], (config.startSymbol, "A"), testMarkovDict2) == ('B', ('01', '01')) print "C:"