Beispiel #1
0
def decodeTextFile(filePath, outputFilePath='/'):
    if outputFilePath == '/':
        outputFilePath = os.path.splitext(filePath)[0] + '.txt'

    bfileReader = open(filePath, "rb")

    dict = getNgramDict()

    SO = chr(0x000E)
    SI = chr(0x000F)

    fileBytes = bfileReader.read()
    fileBits = BitArray(fileBytes).bin

    bitsReader = False
    charStream = ''
    binStream = ''
    decoded = ''
    while len(fileBits) > 0:
        t = len(fileBits)
        #byte = fileBits[:4]
        #fileBits = fileBits[4:]

        if bitsReader:
            firstBits = fileBits[:4]
            fileBits = fileBits[4:]
            if firstBits == '0000':
                bStreamLen = len(binStream) + 4
                skip = 0 if bStreamLen % 8 == 0 else 8 - (bStreamLen % 8)
                fileBits = fileBits[skip:]
                bitsReader = False
            else:
                size = 18 if firstBits == '1111' else int(firstBits, 2)
                binStream += firstBits + fileBits[:size]
                fileBits = fileBits[size:]
        else:
            char = chr(int(fileBits[:8], 2))
            bin = fileBits[:8]
            fileBits = fileBits[8:]

            if char == SO:
                charStreamHex = BitArray(bin=charStream).bytes
                decodedChars = charStreamHex.decode(errors='strict')
                #print('c: ' + decodedChars)
                #TODO: implement logger
                decoded += decodedChars
                bitsReader = True
                charStream = ''
                continue
            elif char == SI:
                decodedBits = decodeBinStream(binStream, dict)
                #print('b: ' + decodedBits)
                #TODO: implement logger
                decoded += decodedBits
                binStream = ''
                bitsReader = False

                nextChar = chr(int(fileBits[:8], 2))
                if nextChar in [',', '.']:
                    decoded = decoded[:-1]
                continue
            else:
                charStream += bin

    if len(charStream) > 0:
        charStreamHex = BitArray(bin=charStream).bytes
        decodedChars = charStreamHex.decode(errors='strict')
        #print('c: ' + decodedChars)
        #TODO: implement logger
        decoded += decodedChars

    decoded = ''.join([line.rstrip() + '\n' for line in decoded.splitlines()])
    decoded = decoded[:-1]  #delete newline
    decodedFile = open(outputFilePath, "w", encoding="utf-8")
    decodedFile.write(decoded)
    #print(decoded)
    print("File was decompressed.")