예제 #1
0
    def encode(self, outfile):
        # calculate frequencies
        for char in self.text:
            self.frequencies[char] += 1

        # construct huffman tree
        for k, v in self.frequencies.items():
            self.pq.put(Leaf(k, v))
        for _ in range(self.pq.qsize() - 1):
            min1 = self.pq.get()
            min2 = self.pq.get()
            new_node = Branch(min1, min2)
            self.pq.put(new_node)

        tree = self.pq.get()

        # put encodings into a dict for each char, and flip for decodings
        self._get_encodings(tree)
        self.decodings = {str(v.bin): k for k, v in self.encodings.items()}

        # generate output bit array
        bits_out = BitArray(bin="")
        for char in self.text:
            bits_out.append(self.encodings[char])

        with open(outfile, 'wb') as f:
            bits_out.tofile(f)

        return bits_out
예제 #2
0
def print_binary_map(taint_dict, binary_map_and_size):
    """
    The function creates and saves the binary map of bytes that are tainted from the source.
    The bit[x] means that byte[x] from input file is tainted.
    :param taint_dict: Taint dictionary
    :param binary_map_and_size: Binary file and size - name:size
    :return: None
    """
    assert taint_dict
    if not binary_map_and_size:
        return

    [file_name, size] = binary_map_and_size.split(':')
    if int(size) < 0:
        logger.warinig("The size value is negative")
        return
    map = BitArray(int(size))
    map.set(0)

    for key in taint_dict:
        for value in range(taint_dict[key]):
            map.set(1, key + value)

    with open(file_name, "wb") as binary_map:
        map.tofile(binary_map)
예제 #3
0
def main():
    file = open(sys.argv[1], "rb")
    msg = ConstBitStream(file)

    s_in = BitArray()
    keys = {Bits(''): 0}
    s_out = BitArray()
    count = 1
    n_bits = 0
    while True:
        try:
            s_in.append(msg.read(1))
        except ReadError:
            break

        #Se a palavra nao tiver no dicionario
        if Bits(s_in) not in keys:
            # x = yb
            y = s_in[:-1]
            b = s_in[-1:]

            pos = keys[Bits(y)]

            #log base 2 |keys|
            n_bits = ceil(log2(len(keys)))

            if n_bits != 0:
                prefix = Bits(uint=int(pos), length=n_bits)
            else:
                prefix = Bits('')

            s_out.append(Bits('0b' + str(prefix.bin) + str(b.bin)))

            keys[Bits(s_in)] = count
            count += 1
            s_in.clear()

    #Add padding: 00000 10101
    #Numero de zeros é o tamanho dos bits extra para depois no descompressor saber
    if s_in[:1].bin == Bits(1):
        z = Bits('0b' + '0' * len(s_in))
    else:
        z = Bits('0b' + '1' * len(s_in))

    s_in.reverse()
    s_out.reverse()

    s_out.append(s_in)
    s_out.append(z)

    s_out.reverse()

    with open(sys.argv[2], 'wb') as f_out:
        s_out.tofile(f_out)

    file.close()
예제 #4
0
def output(tree: list, file_name: str):
    """Takes the tree values and uses them to create the output.bin file"""
    file2 = open(file_name[:-3] + "bin", "wb")
    codes = ""
    for item in tree:
        #stores char length, char length
        if len(item['character'].encode('utf8')) > 1:
            file2.write(bytes("?".encode('utf8')))
        else:
            file2.write(bytes(item['character'].encode('utf8')))

        length = len(item['binary code'])
        length = BitArray(bin=(format(length, '08b')))
        length.tofile(file2)

        #stores all the codes in one long line
        codes += item['binary code']

    #2 bytes of all 1's follwed by a new line to represent the end of the section
    clear = BitArray(bin='11111111')
    for i in range(2):
        clear.tofile(file2)
    file2.write(bytes("\n".encode('utf8')))

    #writes all the characters codes to the file in one long string
    codes = BitArray(bin=codes)
    codes.tofile(file2)

    #writes a newline character to begin the next section
    file2.write(bytes("\n".encode('utf8')))

    #writes the actual data in the huffman format
    file1 = open(file_name, "r", encoding='utf-8')
    value = ""
    for line in file1:
        for char in line:
            pos = binary_search(tree, char, 0, len(tree) - 1, "character")
            value += tree[pos]['binary code']

            #only writes in lots of 8bits to reduces the amount of wasted bits
            if len(value) % 8 == 0:
                value = BitArray(bin=value)
                value.tofile(file2)
                value = ""

    value = BitArray(bin=value)
    value.tofile(file2)

    file1.close()
    file2.close()
예제 #5
0
def compression():
    """
    This function is formatted to be used when compressing
    the text file. It makes use of the returned values in the following
    functions to get a correctly compressed file:
    --> count_frequencies function
    --> creation_of_huffmantree function
    --> encoded_text function
    --> padding_text function
    All of the above
    """

    path = config_json["filepath_text"]
    # the splitext method is used to split the path name into a pair root and ext.
    # ext being the extension(of the filepath) and root being everything except extension part
    filename, file_extension = os.path.splitext(path)
    # command to create filepath for the binary file
    output_path = filename + ".bin"

    # opens and reads the file
    # uses encoding parameter to open file and read it in any possible language
    with open(path, "r", encoding="utf8") as file, open(output_path,
                                                        "wb") as output:
        # uses read method that returns the specified number of bytes from the file
        # default is -1 (the whole file)
        text = file.read()
        # uses the rstrip method removes any trailing characters at the end of a string
        text = text.rstrip()
        # dictionary used to store data values in key
        d = {}
        frequency = count_frequencies(text)
        g = creation_of_huffmantree(frequency)

        # loops through each frequency element from dictionary
        for el in g:
            d[el[0]] = el[1]

        # uses maketrans method to create a mapping table
        table = text.maketrans(d)
        # uses translate method to return a string where some specified characters are replaced
        # characters are replaced with characters described above using mapping table
        encoded_text = text.translate(table)
        padded_encoded_text = padding_text(encoded_text)
        # saves bytes to a binary file
        b = BitArray(bin=padded_encoded_text)
        b.tofile(output)

    # returns compressed file
    return output_path
예제 #6
0
    def cipher(file_in, file_out, key, init_vector):
        """
        @brief      Mode PCBC chiffrement: chiffre le fichier avec la fonction
                    de chiffrement, la clé et le vecteur initial passé en
                    paramètre dans un autre fichier

        @:param      file_in      Le fichier d'entrée
        @:param      file_out     Le fichier de sortie
        @:param      key          La clé en bytes
        @:param      init_vector  Le vecteur initial

        """
        init_vector = BitArray(init_vector)
        print("vector init : ", init_vector)
        init_vector.tofile(open("vector.txt", "wb+"))
        with open(file_in, 'rb') as f:
            message = BitArray(bytes=f.read())
            mod = len(message) % 128
            if mod != 0:
                message.append(128 - mod)
            print("message_cipher :", message)
            sortie = open(file_out, 'wb+')
            temp = int(len(message) / 128)
            last_bytes = init_vector
            if temp != 1:
                x = 0
                sort = BitArray()
                while x != temp:
                    print("x:", x)
                    chunk = message[128 * x:(128 * (x + 1))]
                    print("chunk:", chunk)
                    chunk_xor = chunk ^ last_bytes
                    print("encrypt:", chunk_xor)
                    last_bytes = cam.encryption(chunk_xor, key)
                    print("function encryption, cipher:", last_bytes)
                    sort.append(last_bytes)
                    last_bytes = chunk ^ last_bytes
                    x = x + 1
                sort.tofile(sortie)
            else:
                print("There is no enough blocks to apply PCBC mode.")
            sortie.close()
        f.close()
def encode():
    # Call functions to create tree, then uses on data to create and save a binary encoded version
    print('encoding...')
    # Read input data file
    # load file and convert to string
    file = open(txtType, "r")
    fileString = file.read()
    file.close()

    data = fileString
    # Loops through characters to calculate frequencies
    frequencies = defaultdict(int)
    for character in data:
        frequencies[character] += 1
    # Uses frequencies to create canonical tree for encoding
    encoded = createTree(frequencies)
    encoded = canonical(encoded)
    keyForDecode = []
    # Adds character-length pairs to front of binary string to be used for canonical decoding
    for x in encoded:
        keyForDecode.append(bin(len(x[1]))[2:].zfill(8))
        keyForDecode.append(bin(ord(x[0]))[2:].zfill(8))
    binary = ''
    for i in keyForDecode:
        binary += i
    # Adds '}' to mark the end of the key
    binary += bin(ord('}'))[2:].zfill(8)
    # Adds encoded string
    binary += createCode(encoded, data)
    dif = len(binary) % 8
    if dif == 0:
        dif = 8
    zeroes = 8 - dif
    # Adds number of zeroes to the front which will need to be removed from the end (because encoded in 8-bit chunks)
    binary = bin(ord(str(zeroes)))[2:].zfill(8) + binary
    output = BitArray(bin=binary)

    # Write output to file .hc
    file2 = open(hcType, "wb")
    output.tofile(file2)
    file2.close()
    print('encoded time: ', clock())
예제 #8
0
    def decipher(file_in, file_out, key, init_vector):
        """
        @brief      Mode PCBC déchiffrement: déchiffre le fichier avec la
                    fonction de chiffrement, la clé et le vecteur initial
                    passé en paramètre dans un autre fichier

        @:param      file_in      Le fichier d'entrée
        @:param      file_out     Le fichier de sortie
        @:param      key          Camellia Key Object
        @:param      init_vector  Le vecteur initial

        """
        vector = open(init_vector, 'rb')
        init_vector = BitArray(vector)
        print("vector d'initialisation :", init_vector)
        if len(init_vector) != 128:
            raise ValueError("init_vector must be 128 bits.")
        with open(file_in, 'rb') as f:
            message = BitArray(bytes=f.read())
            last_chunk = init_vector
            sortie = open(file_out, 'wb+')
            temp = int(len(message) / 128)
            if temp != 1:
                x = 0
                sort = BitArray()
                while x != temp:
                    print("x:", x)
                    chunk = message[128 * x:(128 * (x + 1))]
                    print("chunk:", chunk)
                    chunk_deciph = cam.decryption(chunk, key, True)
                    print("decript:", chunk_deciph)
                    chunk_deciph ^= last_chunk
                    print("function decryption, decipher:", chunk_deciph)
                    sort.append(chunk_deciph)
                    print("sortie :", sort)
                    last_chunk = chunk ^ chunk_deciph
                    x += 1
                    sort.tofile(sortie)
            else:
                print("There is no enough blocks to apply PCBC mode.")
            sortie.close()
        f.close()
예제 #9
0
    def createCompressText(self, filename, root) -> None:
        """
        This function opens a text and uses the codes created to encode the tree
        it then write the bytes to a bin file
        :return: VOID
        """

        code = self.getCodes()
        temp = []

        with open("books/" + filename + ".txt", "r", encoding="utf-8") as text:
            converted_file = text.read()
            for char in converted_file:
                temp.append(self.codes[char])
            string = "".join(temp)

        binary_string = BitArray(bin=string)
        with open("output/" + filename + ".bin", "wb") as newFile:
            pickle.dump(root, newFile)
            binary_string.tofile(newFile)
        newFile.close()
예제 #10
0
def ECB(function, file_in, file_out, key):
    """
    @brief      Mode ECB: chiffre/déchiffre le fichier avec la fonction de
                chiffrement et la clé passée en paramètre dans un autre fichier

    @:param      file_in     L'adresse du fichier d'entrée
    @:param      file_out    l'adresse du fichier de sortie
    @:param      chunk_size  La taille du bloc en bytes
    @:param      key         La clé en bytes

    """
    with open(file_in, 'rb') as f:
        message = BitArray(bytes=f.read())
        mod = len(message) % 128
        if mod != 0:
            message.append(128 - mod)
        sortie = open(file_out, 'wb+')
        temp = int(len(message) / 128)
        if temp != 1:
            x = 0
            sort = BitArray()
            while x != temp:
                print("x:", x)
                chunk = message[128 * x:(128 * (x + 1))]
                print("chunk:", chunk)
                if function == cam.decryption:
                    cipher = function(chunk, key, True)
                    print("function decryption, decipher:", cipher)
                else:
                    cipher = function(chunk, key)
                    print("function encryption, cipher:", cipher)
                sort.append(cipher)
                print("sortie :", sort)
                x = x + 1
            sort.tofile(sortie)
        else:
            print("There is no enough blocks to apply ECB mode.")
        sortie.close()
    f.close()
예제 #11
0
from PIL import Image
from bitstring import BitArray
import sys
im = Image.open(sys.argv[1])
pix = im.load()
w = im.size[0]
h = im.size[1]

s = BitArray(bytearray([w, h]))

bits = []
for j in range(h):
    for i in range(w):
        if len(bits) == 8:
            s += bits
            bits = []
        if pix[i, j] == 1:
            bits.insert(0, 0)
        else:
            bits.insert(0, 1)
# pad row to 8 bits
    dif = 8 - len(bits)
    for x in range(0, dif - 1):
        bits.insert(0, 0)

f = open('output.rgf', 'wb')
s.tofile(f)

print s.hex
예제 #12
0
# index of minumum packet size in File Proprties header
i_min_data_pkt_size = index[0] + 736

print "[*] Original Minimum Data Packet Size: %s" % fb[
    i_min_data_pkt_size:i_min_data_pkt_size + 32].hex
print "[*] Original Maximum Data Packet Size: %s" % fb[i_min_data_pkt_size +
                                                       32:i_min_data_pkt_size +
                                                       64].hex

# Accroding to ASF standarad the minimum data size and the maximum data size should be equal
print "[*] Changing Miniumum and Maximum Data packet size to 0"

# changing the data packets in bit array

fb[i_min_data_pkt_size:i_min_data_pkt_size + 8] = 0x00
fb[i_min_data_pkt_size + 8:i_min_data_pkt_size + 16] = 0x00
fb[i_min_data_pkt_size + 16:i_min_data_pkt_size + 24] = 0x00
fb[i_min_data_pkt_size + 24:i_min_data_pkt_size + 32] = 0x00
fb[i_min_data_pkt_size + 32:i_min_data_pkt_size + 40] = 0x00
fb[i_min_data_pkt_size + 40:i_min_data_pkt_size + 48] = 0x00
fb[i_min_data_pkt_size + 48:i_min_data_pkt_size + 56] = 0x00
fb[i_min_data_pkt_size + 56:i_min_data_pkt_size + 64] = 0x00

print "[*] POC File Created poc.asf"

of = open('poc.asf', 'w+b')
fb.tofile(of)
of.close()
f.close()
예제 #13
0
# -*- coding: utf-8 -*-
#
# Python 2.7.1
#

from bitstring import BitArray

fname = 'image.jpg'

with open(fname, 'r+b') as fh:
    byte_map = [ord(b) for b in fh.read(4)]
    byte_list = [byte_map[0], byte_map[1], byte_map[2], byte_map[3]]
    print 'retrieved', len(byte_list), 'from file', fname
    offset = 0
    for ascii_val in byte_list:
        bin_val = BitArray(hex(ascii_val))
        print bin_val.bin
        BitArray.reverse(bin_val)
        print bin_val.bin
        fh.seek(offset)
        bin_val.tofile(fh)
        print 'writing offset', offset, 'of file', fname
        offset += 1

fh.close()
예제 #14
0
for i in range (0,len(mycharset)):
    if countset[i] != 0: # suppress the zero appearance charecters
        print(mycharset[i] , " has " , '{0:04d}'.format(countset[i]) , " times appeared. "+\
              "Probability = " , '{:.10f}'.format(probabilityset[i]) + " Huffman: " + mycodebook[str(mycharset[i])]) # just a print out operation

onesandzeros = "" # initiate bit array

for i in range (0, len(allofthefile)):
    onesandzeros = onesandzeros + mycodebook[str(allofthefile[i])] # create ones and zeros array

binary_file = open('compressed_foo.bin', 'wb') # open the binary compressed file for writing

i = 0
while (i < len(onesandzeros)):
    b = BitArray(bin=onesandzeros[i:i+8]) # divide array with 8 many bits and make them into a byte
    b.tofile(binary_file) # write the calculated byte to file
    i = i+8

binary_file.close()

binary_file = open('compressed_foo.bin', "rb") # open the binary compressed file for reading
allofthebinaryfile = binary_file.read() # read all of the bytes in the compressed file
binary_file.close()

newonesandzeros = "" # initiate new bit sequance to decompression of the file

for i in range (0, len(allofthebinaryfile)):
    newonesandzeros = newonesandzeros + str(bin(allofthebinaryfile[i])[2:].zfill(8)) # tranform bytes into bit array

mynewfile = "" # initiate character array
예제 #15
0
파일: DataIO.py 프로젝트: sultan86/biolzma
def SaveBinStrData(binStr, fileName):
    binData = BitArray(bin = binStr)
    try:
        binData.tofile(open(fileName, 'wb'))
    except:
        GlobalMsg.warn('unable to open file [' + fileName + ']')
예제 #16
0
print 'Analyzing file'
charWeights = countChars(sourceFile)
print '\n'

#Create symbol table based on file analasys
symbolTable = PrefixTree(charWeights).getEncodeTable()

#Initialize output bits with count of chars
outputBits = BitArray(int=len(charWeights), length=8)

#Add char weights to binary output
for char in charWeights.keys():
  outputBits.append(BitArray(int=ord(char), length=32))
  outputBits.append(BitArray(int=charWeights[char], length=32))

#Add code words
print 'Encoding file'
line = 0
sourceFile = open(sys.argv[1], 'r')
for s_line in sourceFile:
  line += 1
  update_progress(line, lineCount)
  for s_char in s_line:
    outputBits.append(BitArray('0b{}'.format(symbolTable[s_char])))
print '\n'

#Write compressed binary to file
outFile = open('{}.hc'.format(sys.argv[1]), 'wb')
outputBits.tofile(outFile)
print 'Done.'
예제 #17
0
def main():
    file = open(sys.argv[1], "rb")
    msg = ConstBitStream(file)

    s_in = BitArray()
    aux = BitArray()
    keys = ['']
    s_out = ''

    s_in.append(msg.read(1))
    aux = s_in.copy()
    aux.invert(0)
    count = 0

    #Encontrar o tamanho do padding
    while (s_in.bin) != aux.bin:
        try:
            count += 1
            s_in.clear()
            s_in.append(msg.read(1))

        except ReadError:
            break

    padding = BitArray()
    padding.append(s_in)

    s_in = BitArray()

    #Com o tamanho encontrar o padding correspondente
    padding.append(msg.read(count - 1))

    while True:
        n_bits = ceil(log2(len(keys)))

        try:
            s_in.append(msg.read(n_bits + 1))
        except ReadError:
            break

        y = s_in[:-1]
        b = s_in[-1:]

        if Bits(y) == Bits(''):
            pos = 0
        else:
            pos = y.uint

        s_out = s_out + (str(keys[pos]) + b.bin)

        keys.append(str(keys[pos]) + str(Bits(b).bin))

        s_in = BitArray()

    output = BitArray('0b' + s_out)
    output.append(padding)

    with open(sys.argv[2], 'wb') as f_out:
        output.tofile(f_out)

    file.close()
예제 #18
0
def writeB(scene, b_name):
    #Sanitization checks
    #if no image size, error
    if scene.n_films == 0:
        print "Error: Scene needs a film."
        return

    #if no camera, error:
    if scene.n_cameras == 0:
        print "Error: Scene needs a camera."
        return

    #if no bounding box, error
    if scene.n_boundboxes == 0:
        print "Error: Scene needs a bounding box."
        return

    #Create bitstring
    s = BitArray()

    #Film
    film = scene.films[0]
    t = BitArray()
    t = bitstring.pack("3*int:32", 0, film['width'], film['height'])
    print t.unpack("3*int:32")
    s = s + t

    #Camera
    camera = scene.cameras[0]
    t = BitArray()
    t = bitstring.pack("int:32, 12*float:32", 1, camera['point'][0],
                       camera['point'][1], camera['point'][2],
                       camera['fieldOfView'], camera['toPoint'][0],
                       camera['toPoint'][1], camera['toPoint'][2],
                       camera['up'][0], camera['up'][1], camera['up'][2],
                       camera['lensRadius'], camera['focalDepth'])
    print t.unpack("int:32, 12*float:32")
    s = s + t

    #Lights
    for i in range(scene.n_lights):
        light = scene.lights[i]
        t = BitArray()
        t = bitstring.pack("2*int:32, 6*float:32", 2, light['type'],
                           light['point'][0], light['point'][1],
                           light['point'][2], light['color'][0],
                           light['color'][1], light['color'][2])
        print t.unpack("2*int:32, 6*float:32")
        s = s + t

    #Materials
    for i in range(scene.n_materials):
        mat = scene.materials[i]
        t = BitArray()
        t = bitstring.pack("int:32, 3*float:32, 2*int:32, 4*float:32", 3,
                           mat['color'][0], mat['color'][1], mat['color'][2],
                           mat['type'], mat['metal'], mat['specular'],
                           mat['lambert'], mat['ambient'], mat['exponent'])
        print t.unpack("int:32, 3*float:32, 2*int:32, 4*float:32")
        s = s + t

    #Spheres
    for i in range(scene.n_spheres):
        sphere = scene.spheres[i]
        t = BitArray()
        t = bitstring.pack("int:32, 4*float:32, int:32", 4, sphere['point'][0],
                           sphere['point'][1], sphere['point'][2],
                           sphere['radius'], sphere['materialIndex'])
        print t.unpack("int:32, 4*float:32, int:32")
        s = s + t

    #Triangles
    for i in range(scene.n_triangles):
        tri = scene.triangles[i]
        t = BitArray()
        t = bitstring.pack("int:32, 9*float:32, int:32", 5, tri['point1'][0],
                           tri['point1'][1], tri['point1'][2],
                           tri['point2'][0], tri['point2'][1],
                           tri['point2'][2], tri['point3'][0],
                           tri['point3'][1], tri['point3'][2],
                           tri['materialIndex'])
        print t.unpack("int:32, 9*float:32, int:32")
        s = s + t

    #Bounding Box
    box = scene.boundboxes[0]
    t = BitArray()
    t = bitstring.pack("int:32, 6*float:32", 6, box['min'][0], box['min'][1],
                       box['min'][2], box['max'][0], box['max'][1],
                       box['max'][2])
    print t.unpack("int:32, 6*float:32")
    s = s + t

    #Send end code
    t = BitArray()
    t = bitstring.pack("int:32", 7)
    s = s + t

    #Write to file
    with open(b_name, "wb") as f:
        s.tofile(f)
예제 #19
0
#Create the container for the hashbools
hashTable = BitArray(1000000)
modValue = 1000000

#read each line, split to remove extra chars.
for line in file.readlines():
    data = line.split()
    for line in data:
        #hash with the md5 hashfunction.
        hash = hashlib.md5(line.lower().encode('utf-8')).digest()
        #extract parts of the hash and convert to integer inside array index span
        h1 = int(hash[0:3].encode("hex"), 16) % modValue
        h2 = int(hash[4:7].encode("hex"), 16) % modValue
        h3 = int(hash[8:11].encode("hex"), 16) % modValue
        h4 = int(hash[12:15].encode("hex"), 16) % modValue
        #set the bits in the array. use the set function for speed.
        hashTable.set(True, h1)
        hashTable.set(True, h2)
        hashTable.set(True, h3)
        hashTable.set(True, h4)

file.close()

#open file as binary file and write to it.
with open('data', 'wb') as outfile:
    hashTable.tofile(outfile)

#print execution time. ~20 seconds on my system
end = time.time()
print(end - start)
예제 #20
0
class Ps2Iso:
    def __init__(self, filename):
        self._set_logger()
        self.log.info(f"Loading {filename}, this may take a while...")
        #self.data = Bits(filename=filename)
        self.data = BitArray(filename=filename)
        self.pvd = PVD(self.data)
        self.block_size = self.pvd.logical_block_size

        if self.pvd.system_identifier != "PLAYSTATION":
            self.log.warning(
                (f"system_identifier: '{self.pvd.system_identifier}', "
                 "should be 'PLAYSTATION'"))
            self.log.warning(f"{filename} may not be a PS2 ISO file")
        if self.block_size != 2048:
            self.log.warning((f"logical_block_size: {self.block_size}, "
                              "should be 2048"))
            self.log.warning(f"{filename} may not be a PS2 ISO file")

        self.path_tables = PathTables(self.data, self.pvd)
        self.tree = self.path_tables.get_path_tree()

    def get_object(self, path):
        paths = path.split("/")
        if paths[0] == "":
            paths.pop(0)
        mark = self.tree
        for p in paths:
            mark = mark.get_child(p)
        return mark

    def get_blocks_allocated(self, path):
        obj = self.get_object(path)
        lba_list = self.get_lba_list()
        obj_idx = next(idx for idx, i in enumerate(lba_list) if i[1] == path)
        lba = lba_list[obj_idx][0]
        next_lba = lba_list[obj_idx + 1][0]
        return next_lba - lba

    def get_lba(self, path):
        return self.get_object(path).lba

    def replace_files(self, replacements, allow_move=False):
        paths = [path for path, _ in replacements]
        bins = [b for _, b in replacements]
        sizes = [len(b) // 8 for b in bins]
        blocks_required = [ceil(len(b) / 8 / self.block_size) for b in bins]
        curr_lba = [self.get_lba(p) for p in paths]
        curr_blocks_allocated = [self.get_blocks_allocated(p) for p in paths]

        items = [{
            "path": p,
            "bin": b,
            "size": s,
            "blocks_required": br,
            "curr_lba": cl,
            "curr_blocks_allocated": cb
        } for p, b, s, br, cl, cb in zip(paths, bins, sizes, blocks_required,
                                         curr_lba, curr_blocks_allocated)]

        overflows = []
        for i in items:
            if i["blocks_required"] > i["curr_blocks_allocated"]:
                overflows.append(i)
        for o in overflows:
            self.log.warning((f"{o['path']} (size: {o['size']} "
                              f"requires {o['blocks_required']} blocks, "
                              f"{o['curr_blocks_allocated']} available"))

        if overflows and not allow_move:
            raise ValueError("allow_move must be true to increase file sizes")

        for i in items:
            lba = i["curr_lba"]
            num_blocks = i["curr_blocks_allocated"]
            self.clear_blocks(lba, num_blocks)
        if not allow_move:
            for i in items:
                i["new_lba"] = i["curr_lba"]
                b = i["bin"]
                offset = i["curr_lba"] * self.block_size * 8
                self.data.overwrite(b, offset)
        else:
            raise NotImplementedError("Moving files is not supported yet")

        for i in items:
            self.update_toc(i["path"], i["new_lba"], i["size"])

    def update_toc(self, path, lba, size):
        self.get_object(path).update_toc(lba, size)

    def write(self, filename):
        with open(filename, "wb") as f:
            self.data.tofile(f)

    def clear_blocks(self, start_block, num_blocks):
        start_addr = start_block * self.block_size * 8
        end_addr = start_addr + num_blocks * self.block_size * 8
        self.data.set(0, range(start_addr, end_addr))

    def get_lba_list(self):
        root = self.tree
        lba_list = self._get_lba_list(root)
        lba_list = list(set(lba_list))
        return sorted(lba_list, key=lambda x: x[0])

    def _get_lba_list(self, item, lba_list=None):
        if lba_list is None:
            lba_list = []
        lba = item.lba
        path = item.path
        lba_list.append((lba, path))
        if isinstance(item, TreeFolder):
            for c in item.children:
                self._get_lba_list(c, lba_list=lba_list)
        return lba_list

    def _get_blocks(self, lba, blocks=None, size=None):
        if blocks and size is None:
            size = blocks * self.block_size
        if size is None:
            raise ValueError("blocks/size must be set")

    def _set_logger(self):
        self.log = logging.getLogger("Ps2Iso")
        handler = logging.StreamHandler()
        formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        self.log.addHandler(handler)
        self.log.setLevel(logging.INFO)
예제 #21
0
파일: bloom.py 프로젝트: tobyndax/Kata
modValue = 1000000


#read each line, split to remove extra chars.
for line in file.readlines():
    data = line.split()
    for line in data:
        #hash with the md5 hashfunction.
        hash = hashlib.md5(line.lower().encode('utf-8')).digest()
        #extract parts of the hash and convert to integer inside array index span 
        h1 = int(hash[0:3].encode("hex"),16)%modValue
        h2 = int(hash[4:7].encode("hex"),16)%modValue
        h3 = int(hash[8:11].encode("hex"),16)%modValue
        h4 = int(hash[12:15].encode("hex"),16)%modValue
        #set the bits in the array. use the set function for speed.
        hashTable.set(True,h1)
        hashTable.set(True,h2)
        hashTable.set(True,h3)
        hashTable.set(True,h4)

file.close()	

#open file as binary file and write to it.
with open('data', 'wb') as outfile:
    hashTable.tofile(outfile)

#print execution time. ~20 seconds on my system
end = time.time()
print(end-start)

예제 #22
0
class TinyCompressor:
    """This class holds data and methods for 
    a data compressor using LEC Algorithm"""

    table = {}
    decode_table = {}
    eof = 0
    __first_data = []
    __previous_data = []
    __data = []
    __data_ns = []
    __decimal_places = []
    __strings_table = {}
    __compressed_data = BitArray()
    __compressed_data_string = ""
    __data_length = 0
    data_frequencies = {}

    __d_values = []
    codec = 0

    def __init__(self, decimal_places):
        self.__decimal_places = decimal_places[:]

    def set_strings_table(self, new_strings_table):
        self.__strings_table = new_strings_table

    def get_n(self, d_value):
        if d_value == 0:
            return 0
        return int(floor(log(abs(d_value),2))) + 1

    def get_a(self,d_value,n_value):
        if d_value == 0:
            return ""
        if d_value < 0:
            return BitArray(int=d_value-1, length=20).bin[(-1)*n_value:]
        if d_value > 0:
            return BitArray(int=d_value, length=20).bin[(-1)*n_value:]

    def generate_data_list(self,inputfilename):
        first = True
        self.__first_data = []
        self.__previous_data = []
        self.__data_ns = []
        self.__data = []
        self.data_frequencies = {}
        with open(inputfilename) as inputfile:
            for line in inputfile:
                linedata = line.split(",")
                self.__data_length = len(linedata)
                if (len(linedata) != len(self.__decimal_places)):
                    print "Length of decimal places different than length of data"
                    return #Should return an exception
                if first:
                    for i in range(len(linedata)):
                        self.__first_data.append(float(linedata[i]))
                    self.__previous_data = self.__first_data[:]
                    first = False
                else:
                    for i in range(len(linedata)):
                        value = (int(float(linedata[i]) * 10**self.__decimal_places[i]) - 
                                int(float(self.__previous_data[i]) * 10**self.__decimal_places[i]))
                        """if (i == 2):
                            print "Value =", value"""
                        self.__data.append(value)
                        self.__data_ns.append(self.get_n(value))
                    self.__previous_data = linedata[:]

        print "Data len =", len(self.__data)
        print "adding range MAX_DATA_N"
        self.__data_ns += range(MAX_DATA_N)



    def generate_table(self,inputfilename):
        self.generate_data_list(inputfilename)
        self.codec = HuffmanCodec.from_data(self.__data_ns)
        self.table = self.codec.get_code_table()

        self.__strings_table = {}
        for symbol in self.table.keys():
            if not type(symbol) is int:
                self.eof = symbol
            bitsize, value = self.table[symbol]
            self.__strings_table[symbol] = bin(value)[2:].rjust(bitsize, '0')

    def encode_data(self, inputfilename, outputfilename):
        self.generate_data_list(inputfilename)
        self.__compressed_data_string = ""

        try:
            for i in range(len(self.__data)):
                self.__compressed_data_string += \
                    self.__strings_table[self.__data_ns[i]] + \
                    self.get_a(self.__data[i], self.__data_ns[i]) 
        except KeyError:
            print "Not possible to encode data[{}] = {}".format(i, self.__data[i])
            return

        #Add EOF
        self.__compressed_data_string += self.__strings_table[self.eof]

        self.__compressed_data = BitArray(bin=self.__compressed_data_string)
        #print "Compressed data to file:", self.__compressed_data.bin

    def to_file(self):
        with open(outputfilename, 'wb') as outputfile:
            self.__compressed_data.tofile(outputfile)

    def build_values(self,inputfilename):
        print "Building values from", inputfilename

        compressed_bitarray = 0
        with open(inputfilename, 'rb') as compressedfile:
            compressed_bitarray = BitArray(compressedfile)

        #print "Compressed data from file:", compressed_bitarray.bin

        for k in self.__strings_table.keys():
            if (type(k) is int):
                self.decode_table[self.__strings_table[k]] = k
        possible_codes = set(self.decode_table.keys())

        #print "Decode table =", self.decode_table
        self.__d_values = []
        time_to_stop = False
        iteration = 0
        start_s = 0
        end_s = 1
        start_a = end_s
        end_a = 3
        n = 0
        s = 0
        a = 0

        while( not time_to_stop):
            if compressed_bitarray[start_s:end_s].bin in possible_codes:
                s = compressed_bitarray[start_s:end_s]
                n = self.decode_table[s.bin]
                start_a = end_s
                end_a = start_a + n # +1 ?


                if n == 0: #a = 0
                    self.__d_values.append(0)
                else:
                    a = compressed_bitarray[start_a:end_a]
                    if a[0]:
                        self.__d_values.append((OFFSET_ZERO+ a).int)
                    else:
                        self.__d_values.append((OFFSET_ONE+ a).int +1)
                start_s = end_a
            else:
                end_s += 1
            if end_s >= len(compressed_bitarray.bin):
                time_to_stop = True


    def decode_data(self,first_values, inputfilename, outputfilename):
        self.build_values(inputfilename)
        self.__values = []
        accumulator = first_values[:]
        print "len __d_values =", len(self.__d_values)
        """print "Data encoded =", self.__data
        print "Data decoded =", self.__d_values
        print "First values =", first_values"""
        """for i in range(len(self.__d_values)/len(accumulator)):
            self.__values.append(accumulator[:])
            for j in range(i, i*len(accumulator)+ len(accumulator)):
                print "(i,j) =",j-i*len(accumulator),j
                accumulator[j-i*len(accumulator)] += self.__d_values[j]"""
        self.__values.append(accumulator[:])
        for i in range(len(self.__d_values)):
            """if (i == 2):
                print "Value =", self.__d_values[i]"""
            """if((i%len(accumulator) == 1)):
                print self.__d_values[i]"""
            if self.__decimal_places[i%len(accumulator)] == 0:
                accumulator[i%len(accumulator)] += self.__d_values[i]
            else:
                accumulator[i%len(accumulator)] += float(self.__d_values[i]) \
                    / 10**self.__decimal_places[i%len(accumulator)]
            if ((i%len(accumulator)) == (len(accumulator)-1)):
                self.__values.append(accumulator[:])

        with open(outputfilename, 'wb') as outputfile:
            for value in self.__values:
                line = ",".join(
                    [("{:."+str(self.__decimal_places[i]) +"f}").format(float(value[i])) for i in range(len(value))])
                outputfile.write(line + '\n')
예제 #23
0
index  =  fb.find('0xa1dcab8c47a9cf118ee400c00c205365',bytealigned=True)

print "[*] found file properties GUID"
print "[*] File properties GUID: %s" % fb[index[0]:(index[0]+128)]

# index of minumum packet size in File Proprties header
i_min_data_pkt_size = index[0] +  736

print "[*] Original Minimum Data Packet Size: %s" % fb[i_min_data_pkt_size:i_min_data_pkt_size+32].hex
print "[*] Original Maximum Data Packet Size: %s" % fb[i_min_data_pkt_size+32:i_min_data_pkt_size+64].hex

# Accroding to ASF standarad the minimum data size and the maximum data size should be equal
print "[*] Changing Miniumum and Maximum Data packet size to 0"

# changing the data packets in bit array

fb[i_min_data_pkt_size:i_min_data_pkt_size+8] = 0x00
fb[i_min_data_pkt_size+8:i_min_data_pkt_size+16] = 0x00
fb[i_min_data_pkt_size+16:i_min_data_pkt_size+24] = 0x00
fb[i_min_data_pkt_size+24:i_min_data_pkt_size+32] = 0x00
fb[i_min_data_pkt_size+32:i_min_data_pkt_size+40] = 0x00
fb[i_min_data_pkt_size+40:i_min_data_pkt_size+48] = 0x00
fb[i_min_data_pkt_size+48:i_min_data_pkt_size+56] = 0x00
fb[i_min_data_pkt_size+56:i_min_data_pkt_size+64] = 0x00

print "[*] POC File Created poc.asf"

of = open('poc.asf','w+b')
fb.tofile(of)
of.close()
f.close()
예제 #24
0
def save_best(data: BitArray):
    file = open(file=current_best, mode='wb')
    data.tofile(file)
    file.close()
예제 #25
0
def compress(originalfile,
             treefile='treefile.json',
             compressedfile='compressedfile.bin'):
    """
  The compress function compresses the originalfile, and stores
  the tree as "treefile" and the compressed file as "compressedfile"

  """
    #creates a blank dictionary
    thecharacters = {}
    tree = {}
    #opnes the textfile with utf8 encoding as f
    with open(originalfile, encoding='utf8') as f:
        #loops until the file has ended
        while True:
            #reads the
            c = f.read(1)
            #if c is blank
            if not c:
                #stop the timer as the file is finished and exit the loop
                break
            #if it appears in the dictionary
            if c in thecharacters:
                #add 1 to the counter of frequency
                thecharacters[c] += 1
            #if c doesnt appear in the dictionary
            if c not in thecharacters:
                #add the item to the dictionary
                thecharacters[c] = 1
    f.close()
    for key in thecharacters:
        tree[key[0]] = ''
    """
  create the tree
  smallest value gets assigned as 0 and 1
  old dictionary altered to reflect the combined values
  new dictionary created with the single elements and binary didgts for each character
  assign the new bits to the end of the dictionary value
  repeat until length of old dictionary is 1
  """

    while len(thecharacters) > 1:
        smallest2 = sorted(thecharacters.items(), key=itemgetter(1))[:2]

        del thecharacters[smallest2[0][0]]
        del thecharacters[smallest2[1][0]]
        thecharacters[smallest2[0][0] + '㋡' + smallest2[1][0]] = int(
            smallest2[0][1]) + int(smallest2[1][1])

        for i in smallest2[0][0].split('㋡'):
            tree[i] += '0'
        for i in smallest2[1][0].split('㋡'):
            tree[i] += '1'
    """
  When tree is created, need to flip the binary values back to front to give the actual values
  """
    for i in tree:
        tree[i] = tree[i][::-1]

    #write json file with the huffman tree
    json.dump(tree, open(treefile, 'w'))
    """
  read the file
  compare the character to the dictionary
  find the binary
  write it to file
  """
    binstring = ''
    file = open(compressedfile, "wb")
    with open(originalfile, encoding='utf8') as f:
        #loops until the file has ended
        while True:
            c = f.read(1)
            #if character appears in the dictionary
            if c in tree:
                #writes the value to the file
                binstring += tree[c]
            if not c:
                a = BitArray(bin=binstring)
                a.tofile(file)
                file.close()
                f.close()
                break

    original = int(os.stat(originalfile).st_size)
    final = int(os.stat(compressedfile).st_size)
    treesize = int(os.stat(treefile).st_size)
    print(originalfile)
    print("Original file size: ", os.stat(originalfile).st_size, "bits")
    print("Compresd file size: ", os.stat(compressedfile).st_size, "bits")
    print("The tree file size: ", os.stat(treefile).st_size, "bits")
    print("The totl file size: ", final + treesize, "bits")
    print((((final + treesize) - original) / original) * 100, "% change")
for i in Lis:
    file.write(i[0])
    file.write(i[1])
    file.write(" ")
file.close()
file = open(r"C:\Users\akash\OneDrive\Desktop\Images\compressed.txt", "wb")
#Location of the compressed file
#dec=file.read()
temp_string = filecon
check_st = ""
for i in Lis:
    if (i[0] == "0"):
        temp_string = temp_string.replace(i[0], "^$^")
        check_st = i[1]
for i in Lis:
    if (i[0] == "1"):
        temp_string = temp_string.replace(i[0], i[1])
temp_string = temp_string.replace("^$^", check_st)
print(temp_string)
for i in Lis:
    if (i[0] != "0" and i[0] != "1"):
        temp_string = temp_string.replace(i[0], i[1])
print(temp_string)
temp_string = "0b" + temp_string  #Addition of "0b" is required by BitString to convert it
print("length", len(temp_string))
b = BitArray(temp_string)
b.tofile(file)
file.close()
length_tempst = len(temp_string) - 2
#This variable will be passed to the Huffmantextdecom.py file
예제 #27
0
    if fill_perc == 50:
        registry = BitArray(os.urandom(size // 8))
        filled = registry.count(1)
        if filled > fill_count:
            registry.invert()
            filled = size - filled
    else:
        filled = 0
        registry = BitArray(length=size)

    for _ in range(index_bits):
        registry.set(1, random.choices(range(0, size), k=(inv_count - filled)))
        filled = registry.count(1)
        if inv_count - filled < 10:
            break

    while filled < inv_count:
        pos = random.randrange(size)
        if not registry[pos]:
            registry.set(1, pos)
            filled += 1

    if fill_perc > 50:
        registry.invert()

    filename = f"data/{index_bits}bits_{fill_perc}pc_random.gz"
    with gzip.open(filename, "wb") as fp:
        registry.tofile(fp)
    print(filename)
예제 #28
0
def calc():
    bStartTolerance = breakbetween + (breakbetween * bTolerance)
    sStartTolerance = breakbetween - (breakbetween * sTolerance)
    bBigBreakTolerance = longBreak + (longBreak * bTolerance)
    sBigBreakTolerance = longBreak - (longBreak * bTolerance)
    bSmallBreakTolerance = (longBreak * factor) + (longBreak * factor *
                                                   bTolerance)
    sSmallBreakTolerance = (longBreak * factor) - (longBreak * factor *
                                                   sTolerance)
    #bSyncTolerance = 0.01+(0.1*bTolerance)
    #sSyncTolerance = 0.01-(0.1*sTolerance)
    global codedata
    global result
    write = []
    index = 0
    partialError = 0
    dataIndex = 0
    dif = []

    while 1:
        mutex.acquire()

        while True:
            if len(data) > len(codedata):
                codedata.append([])
            else:
                break

        while True:
            if len(data) > len(write):
                write.append(False)
            else:
                break

        # start new index circualation
        if dataIndex == len(data) - 1:
            dataIndex = -1

        #
        if dataIndex < len(data) - 1:
            dataIndex += 1

        if len(data) == 0:
            mutex.release()
            time.sleep(threadBreak)
            continue

        if len(data[dataIndex]) != 0:
            while len(data[dataIndex]) > 2:  # enouth to compare?
                d1 = datetime.strptime(data[dataIndex][0], "%H:%M:%S.%f")
                d2 = datetime.strptime(data[dataIndex][1], "%H:%M:%S.%f")
                d1 = d2 - d1  # calculate the time between paket
                dif.append(float(d1.total_seconds()))
                data[dataIndex].pop(0)

        for f1 in dif:
            if sStartTolerance < f1 < bStartTolerance:  # searching the file start/end
                index = 0
                print(str(f1) + "  \t=> Start of File")
                if codedata[dataIndex] != []:
                    hashFromServer = codedata[dataIndex][
                        -8:]  # get the Hash fom the end of the data
                    del codedata[dataIndex][-8:]  # remove hash from data
                    hashFromServer = int(
                        ''.join(str(e) for e in hashFromServer), 2)
                    print("Hash from serer: " + str(hashFromServer))
                    dataString = ''.join(
                        str(e) for e in
                        codedata[dataIndex])  # data from List to String
                    hashFromClient = hash8(
                        codedata[dataIndex],
                        table)[0]  # generating 8 bit Perason Hash
                    print("Hash from client: " + str(hashFromClient))

                    if hashFromServer != hashFromClient:
                        print(
                            "Mistake in data transfer... Hashes are not the same!"
                        )

                    print("")
                    print("Data: " + dataString)
                    print("Data Length: " + str(len(dataString)))
                    print("")

                    b = BitArray(bin=dataString
                                 )  # making bitArray without Char encoding
                    if hashFromClient == hashFromServer:  # successfully transfered
                        f = open('./' + filename, 'wb')  # open file
                        b.tofile(f)  # write to file
                        f.flush()
                        f.close()
                        return

                codedata[dataIndex] = []

                if write[
                        dataIndex] == False:  # false at the beginning as long the file hasnt started
                    write[dataIndex] = True
            else:
                if write[dataIndex] == True:
                    if sBigBreakTolerance < f1 < bBigBreakTolerance:  # time range for a 1
                        codedata[dataIndex].append("1")
                        print(
                            str(index) + "\t" + str(f1) + "  \t=> 1 "
                        )  # print result and distance to the range borders
                    else:
                        if sSmallBreakTolerance < f1 < bSmallBreakTolerance:  # time range for 0
                            codedata[dataIndex].append("0")
                            print(str(index) + "\t" + str(f1) + "  \t=> 0")
                        else:
                            partialError += 1
                            print(
                                str(index) + "\t" + str(f1) +
                                "  \t=> undefind: will be ignored")
                    index += 1
        dif = []
        mutex.release()
        time.sleep(threadBreak)
예제 #29
0
def calc():

    bStartTolerance = breakbetween + (breakbetween * bTolerance)
    sStartTolerance = breakbetween - (breakbetween * sTolerance)
    bBigBreakTolerance = longBreak + (longBreak * bTolerance)
    sBigBreakTolerance = longBreak - (longBreak * bTolerance)
    bSmallBreakTolerance = (longBreak * factor) + (longBreak * factor *
                                                   bTolerance)
    sSmallBreakTolerance = (longBreak * factor) - (longBreak * factor *
                                                   sTolerance)
    #bSyncTolerance = 0.01+(0.1*bTolerance)
    #sSyncTolerance = 0.01-(0.1*sTolerance)

    global codedata
    global result
    write = False
    index = 0
    totalError = 0
    partialError = 0
    totalData = 0
    count = 0
    correctTransfert = 0
    startTime = []
    firstCorrectReceveTime = []
    passFirstTime = 0

    while 1:
        mutex.acquire()
        dif = list()
        while len(data) > 2:  # enouth to compare?
            d1 = datetime.strptime(data[0], "%H:%M:%S.%f")
            d2 = datetime.strptime(data[1], "%H:%M:%S.%f")
            d1 = d2 - d1  # calculate the time between paket
            dif.append(float(d1.total_seconds()))
            data.pop(0)

        for f1 in dif:
            if sStartTolerance < f1 < bStartTolerance:  # searching the file start/end
                index = 0
                print(str(f1) + "  \t=> Start of File")
                if codedata != []:
                    hashFromServer = codedata[
                        -8:]  # get the Hash fom the end of the data
                    del codedata[-8:]  # remove hash from data
                    hashFromServer = int(
                        ''.join(str(e) for e in hashFromServer), 2)
                    print("Hash from serer: " + str(hashFromServer))
                    dataString = ''.join(
                        str(e) for e in codedata)  # data from List to String
                    hashFromClient = hash8(
                        codedata, table)[0]  # generating 8 bit Perason Hash
                    print("Hash from client: " + str(hashFromClient))

                    if hashFromServer != hashFromClient:
                        print(
                            "Mistake in data transfer... Hashes are not the same!"
                        )

                    print("")
                    print("Data: " + dataString)
                    print("Data Length: " + str(len(dataString)))
                    print("")

                    totalData += len(dataString)
                    totalError += partialError
                    partialError = 0

                    print("Fehlerrate gesamt: " +
                          str((totalError / totalData) * 100) + "%")

                    count += 1

                    b = BitArray(bin=dataString
                                 )  # making bitArray without Char encoding
                    if hashFromClient == hashFromServer:
                        f = open('./' + filename, 'wb')  # open file
                        b.tofile(f)  # write to file
                        f.flush()
                        f.close()

                        if correctTransfert == 0:
                            firstCorrectReceveTime = time.time()
                        correctTransfert += 1
                        print("Korrekt Übertragen: " + str(correctTransfert))
                        if startTime != [] and firstCorrectReceveTime != []:
                            print("Zeit bis zum ersten koreketn Paket" +
                                  (times + " " +
                                   str(firstCorrectReceveTime - startTime)))
                        return

                    if count == 20:
                        print("Korrekt Übertragen: " + str(correctTransfert))
                        if startTime != [] and firstCorrectReceveTime != []:
                            print("Zeit bis zum ersten koreketn Paket" +
                                  str(startTime - firstCorrectReceveTime))
                        #return

                codedata = []
                if write == False:  # false at the beginning as long the file hasnt started
                    startTime = time.time()
                    write = True
            else:
                if write == True:
                    if sBigBreakTolerance < f1 < bBigBreakTolerance:  # time range for a 1
                        if index <= 10:
                            print(
                                str(index) + "\t" + str(f1) +
                                "  \t estimate sync")
                        else:
                            codedata.append("1")
                            print(
                                str(index) + "\t" + str(f1) + "  \t=> 1 "
                            )  # print result and distance to the range borders
                    else:
                        if sSmallBreakTolerance < f1 < bSmallBreakTolerance:  # time range for 0
                            if index <= 10:
                                print(
                                    str(index) + "\t" + str(f1) +
                                    "  \t=> estimate sync")
                            else:
                                codedata.append("0")
                                print(str(index) + "\t" + str(f1) + "  \t=> 0")
                        else:
                            if index < 16:
                                print(
                                    str(index) + "\t" + str(f1) +
                                    "  \t=> sync")
                            else:
                                partialError += 1
                                print(
                                    str(index) + "\t" + str(f1) +
                                    "  \t=> undefind: will be ignored")
                    index += 1

        mutex.release()
        time.sleep(threadBreak)
예제 #30
0
from bitstring import BitArray
from tkinter.filedialog import askopenfilename

fn = askopenfilename(filetypes=(("chicken File", "*.ch"), ("All Files",
                                                           "*.*")),
                     title="Select a chicken file")
f = open(fn, 'r')
in_string = f.read()
in_string = ((in_string.replace('chicken',
                                '1')).replace(' ', '')).replace('\n', '0')
bit_array = BitArray(bin=in_string)
#print(bit_array.bin)
out_file = open(fn[:-2] + 'cbit', 'wb')
bit_array.tofile(out_file)
out_file.close()
예제 #31
0
def compress(file):
    print("Compressing...")
    print("")
    # Opens and reads the file, with UTF-8 encoding
    with codecs.open(file, 'r', encoding='utf8') as f:
        text = f.read()
    # Iterates through the characters in the file, adding each unique character to letter_frequency and letters arrays
    # A count is kept for the number of times each character is used in the file and is added to the letter_frequency array (alongside the character)
    letters = []
    letter_frequency = []
    for letter in text:
        if letter not in letter_frequency:
            frequency = text.count(letter)
            letter_frequency.append(frequency)
            letter_frequency.append(letter)
            letters.append(letter)
    # Creates the initial nodes for the Huffman Tree
    nodes = []
    while len(letter_frequency) > 0:
        nodes.append(letter_frequency[0:2])
        letter_frequency = letter_frequency[2:]
    nodes.sort()
    tree = []
    tree.append(nodes)
    # Iterates through characters, allocating each one a 1 or a zero, based on whether the character is in the present in the node before
    # Also creates new nodes if there is more than one character associated to the node before
    while len(nodes) > 1:
        x = 0
        new_node = []
        nodes.sort()
        nodes[x].append("0")
        nodes[x + 1].append("1")
        first_node = (nodes[x][0] + nodes[x + 1][0])
        second_node = (nodes[x][1] + nodes[x + 1][1])
        new_node.append(first_node)
        new_node.append(second_node)
        new_nodes = []
        new_nodes.append(new_node)
        new_nodes = new_nodes + nodes[2:]
        nodes = new_nodes
        tree.append(nodes)
    tree.sort(reverse=True)
    # Removes all duplicate items in the Huffman Tree
    unique_nodes = []
    for level in tree:
        for node in level:
            if node not in unique_nodes:
                unique_nodes.append(node)
            else:
                level.remove(node)
    # Builds the unique binary code for each character based on its path in the Huffman Tree
    if len(letters) == 1:
        letter_code = [letters[0], "0"]
        letter_binary.append(letter_code * len(text))
    else:
        for letter in letters:
            lettercode = ""
            for node in unique_nodes:
                if len(node) > 2 and letter in node[1]:
                    lettercode = lettercode + node[2]
            letter_code = [letter, lettercode]
            letter_binary.append(letter_code)
    # Creates new array, containing only the character and binary code for each character in the Huffman Tree
    tree_levels = []
    tree_level = []
    for letter in letter_binary:
        tree_level.append(letter[0])
        tree_level.append(letter[1])
        tree_levels.append(tree_level)
        tree_level = []
    # Sorts and prints the Huffman Tree using the Sort() function
    print("Huffman Tree of File:")
    print(Sort(tree_levels))
    print("")
    # Creates bitstring of the text in the file, using binary codes of each character
    binary_string = ""
    for character in text:
        for item in letter_binary:
            if character in item:
                binary_string = binary_string + item[1]
    # Prints the binary representation of the file
    print("Compressed File:")
    print(binary_string)
    print("")
    # Writes the bitstring and the Huffman Tree to a bin file (compressed file)
    a = BitArray(bin=binary_string)
    with open('output_file.bin', 'wb') as f:
        a.tofile(f)
        pk.dump(tree, f)
    # Calculates the size of the original file, the compressed file and the size reduction
    uncompressed_file_size = Path(file).stat().st_size
    compressed_file_size_bytes = Path('output_file.bin').stat().st_size
    compressed_file_size = len(binary_string)
    size.append(compressed_file_size)
    print("Original file size: ", uncompressed_file_size, " bytes")
    print("Compressed file size: ", compressed_file_size_bytes, " bytes")
    print("This is a reduction of ",
          (1 - (compressed_file_size_bytes / uncompressed_file_size)) * 100,
          "%")
예제 #32
0
def compression(target_file):
    """This is the compression section"""

    start_dict = float(time.process_time())  # measure time from here
    # frequency dictionary
    letter_freq = {}  # collect all characters in a text file

    opened_file = target_file
    filename = opened_file
    with open(opened_file, encoding="utf_8_sig") as f:
        for line in f:
            for letter in line:
                # print("'" + letter + "' found!") # test code
                try:
                    letter_freq[letter] += 1  # dict take every characters

                except:
                    KeyError  # ignore other characters not in the dictionary
                    letter_freq.update({letter:
                                        1})  # add new character to dict

    # print("letter_freq", letter_freq)
    # Class for creating node objects in a tree
    class Node(object):
        """This is the class that creates an object that holds 2 daughter objects"""

        # constructor
        def __init__(self, left_node=None, right_node=None):
            """
            In a binary tree, each node can only have up to 2 daughters.
            The variable 'left' and 'right' stores the daughter nodes when initialised.
            The node stored in the daughters can either be a character (leaf) or another node object.
            """
            self.left_node = left_node  # left-hand nodes
            self.right_node = right_node  # right-hand node

        # getter method for left and right item
        def daughters(self):
            """This is a method that returns the contents of both nodes stored from their parent node"""
            return self.left_node, self.right_node

    # a ascending sorted array list of nodes (can be either a node object or a character node) in tuple
    # sort all tuple item by their second element (at position 1) in the array
    # convert evey item in the letter frequency dictionary 'letter_freq' into a tuple consist of a character and number
    list_nodes_objects = sorted(letter_freq.items(),
                                key=lambda x: x[1],
                                reverse=False)
    # print(list_nodes_objects)  # test code

    # this while loop iterates
    while len(list_nodes_objects) > 1:
        """
        This while loop iterates the list of node tuple until one item is left.
        The goal of this loop is to generate 1 node, which contains all the combined nodes
        """
        # take the 2 least weight nodes
        (character_1,
         freq_1) = list_nodes_objects[0]  # tuple (character, freq)
        (character_2, freq_2) = list_nodes_objects[1]
        list_nodes_objects = list_nodes_objects[
            2:]  # list of nodes updated after 2 nodes are taken out

        # print("list_nodes_objects ", list_nodes_objects)  # test code

        new_node = Node(
            character_1, character_2
        )  # new node, contains the 2 combined smallest node in an object
        list_nodes_objects.append(
            (new_node, freq_1 + freq_2)
        )  # put the combined node back to the array of node with new weight
        # (combined node, sum of weight)

        # sort the array in order after a new node is append
        list_nodes_objects = sorted(list_nodes_objects,
                                    key=lambda x: x[1],
                                    reverse=False)

    # print("sorted list_nodes_objects ", list_nodes_objects)  # test code

    # Huffman dictionary generator
    def huffman_dictionary(node_object, binary=''):
        """
        This recursive function returns the huffman code mapping (hash table) for each characters in dictionary.
        The parameter 'node' can either be a node object or a character. e.g. (node, weight)
        The parameter 'bitString' stores the binary information of a character in every recursion.
        """
        # if a leaf (a string, not an object) is reached:
        if type(node_object) is str:
            return {
                node_object: binary
            }  # insert a new mapping to the dictionary

        # create a tuple, extract items (left_item, right_item) from a nodes' daughter
        (left_item, right_item) = node_object.daughters()

        encoding_dict = {}  # dictionary contains all the encoding
        # print("1 ", encoding_dict)  # test code

        # call itself recursively until all dictionary is added, depth first search algorithm
        encoding_dict.update(huffman_dictionary(left_item, binary + '0'))
        encoding_dict.update(huffman_dictionary(right_item, binary + '1'))
        # print("2 ", encoding_dict)  # test code
        return encoding_dict  # output the dictionary

    # trigger the function 'huffman_dictionary' to generate the huffman dictionary
    # by passing the [first object] in the [first tuple]
    # in the [list of tuple node object] created in the loop.
    huffman_encoding = huffman_dictionary(list_nodes_objects[0][0])
    # print(huffman_encoding)  # test code

    end_dict = float(
        time.process_time()
    )  # end measure time in this line to check processing time for this section
    print(
        str(end_dict - start_dict) +
        " second taken to create a unique huffman hash table!")

    # add letter frequency to the compressed file

    # convert a dictionary object into string
    string_dict = json.dumps(letter_freq)
    # print("string dict: ", string_dict)  # test code

    # convert dictionary to ascii binary
    byte_array = string_dict.encode()

    binary_int = int.from_bytes(byte_array, "big")
    binary_string = bin(binary_int)

    dict_binary = binary_string[2:]
    # print("dict bin: ", dict_binary)  # test code

    # add binary identifier (a catchphrase) for the dictionary section
    # this separate the hash table with the huffman code
    byte_array1 = "catchphrase".encode()

    binary_int1 = int.from_bytes(byte_array1, "big")
    binary_catchphrase = bin(binary_int1)[2:]
    # print("catch bin: ", binary_catchphrase)  # test code
    # print("catch bin len: ",len(binary_catchphrase))  # test code: 87
    # ------

    huffman_binary = ''
    # insert binary encoding to a new compression file
    opened_file = open(opened_file, encoding="utf_8_sig")
    with opened_file as f:
        for line in f:
            for letter in line:
                # print(huffman_encoding[letter])  # test code
                huffman_binary += huffman_encoding[letter]
                # print(letter, huffman_encoding[letter])  # test code

    # print("huff: ",huffman_binary) # test code

    # compress file with hash table into binary
    encodingString = ""  # the encoded content
    # insert decoding hash table and the catchphrase

    encodingString += dict_binary + binary_catchphrase + huffman_binary
    # encodingString += dict_binary + binary_catchphrase + huffman_binary  # test code
    # print("binary: ", encodingString)  # test code

    # extract original file name
    file_name = filename[:-4]  # assuming the file to be compressed is .txt
    # print(file_name)  # test code

    # write 'encodingString' into a binary file
    compressed_file = BitArray(bin=encodingString)
    open_file = file_name + '.bin'
    with open(open_file, 'wb') as f:
        compressed_file.tofile(f)

    print("Compression completed!")
    end_compress = float(
        time.process_time()
    )  # end measure time in this line to check processing time for this section
    print(str(end_compress - end_dict) + " second taken to compress file!")
    main()