Ejemplo n.º 1
0
def compressData(fileName):
    num, Data = loadData(fileName)
    #Data=Data[:10]
    #num=10
    print(Data[0:2])
    print(num)
    totla_mem = len(Data[0]) * 3 * len(Data)
    print("Total memory " + str(totla_mem))
    #getDiff('abcdabcdabcdb','aaaaabcdaaaaa')
    weights = getmatrix(Data, num)  # this weights is the cost martrix
    own = []
    for d in Data:
        own.append(getReduceVal(d))
    Edges = []
    for i in range(num):
        for j in range(num):
            if (i == j):
                Edges.append((0, i + 1, own[i], 0, i + 1))
            else:
                Edges.append((i + 1, j + 1, weights[i][j], i + 1, j + 1))
    New_mem, refmap = Dmst.dmst(num + 1, Edges, 0)
    print(New_mem)
    print(New_mem / totla_mem)
    for i in refmap:
        print(i[0], " -> ", i[1])
Ejemplo n.º 2
0
def compressDataExpectation(fileName, window, overlap):
    global BITSINNUM
    global BITSIID
    start = time.time()
    Name = []
    num, Data, Name = loadData(fileName)  #
    BITSIID = int(math.log10(num)) * 4
    BITSINNUM = int(math.log10(len(Data[0]))) * 4

    prob = getProbability(Data, 1)

    order3 = getExpectations_2(Data, prob, 1)

    own = []
    for d in Data:
        own.append(getReduceVal(d))

    weights1, dict = getmatrix(Data, num, window, overlap, order3)
    del order3[:]

    Edges = []
    for i in range(num):
        for j in range(num):

            if (i == j):
                Edges.append((0, i + 1, own[i], 0, i + 1))
            elif (i, j) not in weights1:
                continue
            else:
                Edges.append((i + 1, j + 1, weights1[i, j], i + 1, j + 1))
    solution, refmap2 = Dmst.dmst(num + 1, Edges, 0)
    del own[:]

    del weights1

    import os
    FolderName = fileName + '.mstcom/'

    os.makedirs(os.path.dirname(FolderName), exist_ok=True)
    fileName = FolderName + "ref.txt"
    fileName2 = FolderName + "metadata.txt"

    file = open(fileName, 'w')  #
    file2 = open(fileName2, 'w')  #
    k = 0  #
    bal = 0  #
    total_ref = 0  #
    total_non_ref = 0  #
    for i in refmap2:
        #print( i[0],i[1])
        j = i[0]
        k = i[1]
        bal += 1  #

        if (i[0] == 0):
            total_ref += 1

            file.write(str(i[1] - 1) + ',' + Data[i[1] - 1] + "\n")
        else:
            total_non_ref += 1
            compression2.check_mates(dict[i[0] - 1, i[1] - 1], Data[i[0] - 1],
                                     Data[i[1] - 1], file2, i[0] - 1,
                                     i[1] - 1)  #,Data[i[3
            k += 1
    fff = str(num)
    for i in Name:
        fff = fff + "|" + i
    fff += "\n"
    file.write(fff)

    file2.write(compression2.final_final_string)
    file2.flush()

    file2.close()
    file.close()

    #print("ref",total_ref,"non",total_non_ref)

    import bz2
    compressionLevel = 9
    tarbz2contents1 = bz2.compress(
        open(fileName, 'rb').read(), compressionLevel)
    tarbz2contents2 = bz2.compress(
        open(fileName2, 'rb').read(), compressionLevel)

    file = open(fileName, 'wb')  #
    file2 = open(fileName2, 'wb')  #
    file.write(tarbz2contents1)
    file2.write(tarbz2contents2)

    file.flush()
    file2.flush()

    file.close()
    file2.close()

    size1_7 = sys.getsizeof(tarbz2contents1)
    size2_2 = sys.getsizeof(tarbz2contents2)
    size3_72 = size1_7 + size2_2
    print('compressed size ' + str(size3_72))
Ejemplo n.º 3
0
def compressDataExpectation(fileName, window, opverlap, output_file2,
                            output_file3):
    global BITSINNUM
    global BITSIID
    start = time.time()
    Name = []
    num, Data, Name = loadData(fileName)  #
    BITSIID = int(math.log10(num)) * 4
    BITSINNUM = int(math.log10(len(Data[0]))) * 4

    prob = getProbability(Data, 1)

    order3 = getExpectations_2(Data, prob, 1)

    own = []
    for d in Data:
        own.append(getReduceVal(d))

    weights1, dict = getmatrix(Data, num, window, overlap, order3)
    del order3[:]

    Edges = []
    for i in range(num):
        for j in range(num):

            if (i == j):
                Edges.append((0, i + 1, own[i], 0, i + 1))
            elif (i, j) not in weights1:
                continue
            else:
                Edges.append((i + 1, j + 1, weights1[i, j], i + 1, j + 1))
    solution, refmap2 = Dmst.dmst(num + 1, Edges, 0)
    del own[:]

    del weights1
    #New_mem2 = solution + +BITSIID*num

    import os
    FolderName = fileName + '.mstcom\\'
    gg = fileName
    os.makedirs(os.path.dirname(FolderName), exist_ok=True)
    fileName = FolderName + "ref.txt"
    fileName2 = FolderName + "metadata.txt"

    file = open(fileName, 'w')  #
    file2 = open(fileName2, 'w')  #

    k = 0  #
    bal = 0  #
    total_ref = 0  #
    total_non_ref = 0  #
    for i in refmap2:
        #print( i[0],i[1])
        j = i[0]
        k = i[1]
        bal += 1  #

        if (i[0] == 0):
            total_ref += 1

            file.write(str(i[1] - 1) + ',' + Data[i[1] - 1] + "\n")
        else:
            total_non_ref += 1
            compression2.check_mates(dict[i[0] - 1, i[1] - 1], Data[i[0] - 1],
                                     Data[i[1] - 1], file2, i[0] - 1,
                                     i[1] - 1)  #,Data[i[3
            k += 1
    fff = str(num)
    for i in Name:
        fff = fff + "|" + i
    fff += "\n"
    file.write(fff)

    # compression2.write_in_binary_in_file(num,koy_number_data_index,file)

    file2.write(compression2.final_final_string)
    file2.flush()

    file2.close()
    file.close()

    print("ref", total_ref, "non", total_non_ref)

    n1 = fileName.split('/')
    name = ''
    for xx in n1:
        if (xx != 'sifatscompression.txt'):
            name += xx + '/'

    name = name[:-8]

    import bz2
    compressionLevel = 9
    tarbz2contents1 = bz2.compress(
        open(fileName, 'rb').read(), compressionLevel)
    tarbz2contents2 = bz2.compress(
        open(fileName2, 'rb').read(), compressionLevel)
    '''
    import lzma
    Lzip = lzma.LZMACompressor()
    tarbz2contents1 = Lzip.compress(open(fileName, 'rb').read())
    tarbz2contents1+= Lzip.flush()
    '''
    end = time.time()

    size1_7 = sys.getsizeof(tarbz2contents1)

    #tarbz2contents2 = bz2.compress(open(fileName2, 'rb').read(), compressionLevel)
    #Lzip = lzma.LZMACompressor()
    #tarbz2contents2 = Lzip.compress(open(fileName2, 'rb').read())
    #tarbz2contents2+= Lzip.flush()

    size2_2 = sys.getsizeof(tarbz2contents2)
    size3_72 = size1_7 + size2_2
    output_file2.write(gg + '    ' + str(size3_72) + '\n')
    output_file2.flush()

    #tarbz2contents1 = bz2.compress(open(fileName, 'rb').read(), compressionLevel)
    '''
    size1_2 = sys.getsizeof(tarbz2contents1)
    size3_27 = size1_2 + size2_7
    output_file2.write(name + 'sifat bzip fial 7    ' + str(size1_2)+ '    '+str(size2_7)+ '    '+str(size3_27) + '\n')
    output_file2.flush()
	'''

    output_file3.write(gg + '    ' + str(end - start) + '\n')

    output_file2.flush()
    file = open(fileName, 'wb')  #
    file2 = open(fileName2, 'wb')  #
    file.write(tarbz2contents1)
    file2.write(tarbz2contents2)

    file.flush()
    file2.flush()

    file.close()
    file2.close()
    #os.remove(fileName)
    #os.remove(fileName2)

    compression2.MAP_FOR_SYMBOL2 = {}
    compression2.final_final_string = ''  #
    compression2.final_bit = bytearray()
    compression2.final_bit2 = bytearray()
    compression2.final_final_string2 = ''
Ejemplo n.º 4
0
def compressDataExpectation(fileName):
    global BITSIID
    global BITSINNUM
    global MAP_FOR_SYMBOL2

    Name = {}
    num, Data, different_characters, Name = loadData(fileName)  #
    different_characters = format(different_characters, '026b')  #
    #print("uck",different_characters)#
    foo = num + 1  #
    global PER_CHARACTER_BIT  #
    global PER_CHARACTER_BIT2  #
    dict = [[0 for i in range(foo)] for j in range(foo)]  #
    DF = 4
    #print(num, len(Data[1]))
    BITSINNUM = int(math.log2(len(Data[0]))) + 1
    BITSIID = int(math.log2(num)) + 1
    different_characters = different_characters[::-1]  #
    for i in different_characters:  #
        if i == '1':  #
            DF += 1  #
    PER_CHARACTER_BIT = math.ceil((math.log2((DF))))  #
    PER_CHARACTER_BIT2 = math.ceil((math.log2((DF))))  #DF-1
    j = 0  #
    k = 0  #
    for i in different_characters:  #
        if i == '1':  #
            d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b')  #
            d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b')  #
            MAP_FOR_SYMBOL2[chr(ord('a') + k)] = d  #
            j += 1  #
        k += 1  #
    d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b')  #
    print('-', d)
    #print("n\n\n\n\n"+MAP_FOR_SYMBOL['-']+"\m\n\\n\n\n\n")
    d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b')  #
    MAP_FOR_SYMBOL2['-'] = d  #
    j += 1  #
    d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b')  #
    print('#', d)
    d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b')  #
    MAP_FOR_SYMBOL2['#'] = d  #
    j += 1  #
    d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b')  #
    MAP_FOR_SYMBOL2['$'] = d  #symbol2 amar
    print('$', d)
    j += 1  #
    d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b')  #
    MAP_FOR_SYMBOL2['?'] = d  #symbol2 amar
    print('?', d)
    compression2.MAP_FOR_SYMBOL2 = MAP_FOR_SYMBOL2
    print("GENE NUMBER ", num)  #
    totla_mem = len(Data[0]) * 3 * len(Data)
    #print("Total memory "+ str(totla_mem))
    #getDiff('abcdabcdabcdb','aaaaabcdaaaaa')

    prob = getProbability(Data, 1)
    #print(prob)

    order3 = getExpectations_2(Data, prob, 1)
    ###################
    own = []
    for d in Data:
        own.append(getReduceVal(d))

    weights1, dict = getmatrix(Data, num, 5, 3, order3)
    ##########################

    Edges = []
    for i in range(num):
        for j in range(num):

            if (i == j):
                Edges.append((0, i + 1, own[i], 0, i + 1))
            elif weights1[i][j] == -1:
                continue
            else:
                Edges.append((i + 1, j + 1, weights1[i][j], i + 1, j + 1))
    solution, refmap2 = Dmst.dmst(num + 1, Edges, 0)
    New_mem2 = solution + +BITSIID * num
    #print("new memory",New_mem2/8)
    #print("--->",New_mem2/totla_mem)

    check = 0
    import os
    FolderName = fileName + '.mstcom/'

    os.makedirs(os.path.dirname(FolderName), exist_ok=True)
    fileName = FolderName + "sifatscompression.txt"
    fileName2 = FolderName + "fial"

    file = open(fileName, 'w')  #
    file2 = open(fileName2, 'w')  #
    file2.seek(0)
    file2.truncate()
    file2.close()
    file.seek(0)
    file.truncate()
    file.close()
    file = open(fileName, 'w')  #
    file2 = open(fileName2, 'w')  #
    compression2.final_final_string += different_characters
    #file2.write(different_characters)#
    datar_index = BITSINNUM  #
    koy_number_data_index = BITSIID  #
    if datar_index % 2 != 0:  #
        datar_index += 1  #
    if koy_number_data_index % 2 != 0:  #
        koy_number_data_index += 1  #
    datar_index = datar_index / 2  #
    datar_index = math.ceil(datar_index)  #
    koy_number_data_index = koy_number_data_index / 2  #
    koy_number_data_index = math.ceil(koy_number_data_index)  #
    a = format(datar_index, '032b')  #
    # file2.write(a)#
    compression2.final_final_string += a
    a = format(koy_number_data_index, '032b')  #
    #file2.write(a)#
    compression2.final_final_string += a
    '''
    u=(int)(len(compression2.final_final_string)/8)
    u=math.floor(u)
    if(u>=1):
        compression2.give_a_string_put_in_byte_array(compression2.final_final_string[:u*8])
        compression2.final_final_string=compression2.final_final_string[u*8:]
    '''
    k = 0  #
    bal = 0  #
    total_ref = 0  #
    total_non_ref = 0  #
    # compression2.final_final_string2+=compression2.get_binary_in_file(num,koy_number_data_index)
    for i in refmap2:
        #print( i[0],i[1])
        j = i[0]
        k = i[1]
        bal += 1  #
        if (j == 0):
            check += own[k - 1]
        else:
            check += weights1[j - 1][k - 1]
        if (i[0] == 0):
            total_ref += 1
            # print(str(i[1]-1)+','+Data[i[1]-1]+"\n")
            file.write(str(i[1] - 1) + ',' + Data[i[1] - 1] + "\n")
        else:
            total_non_ref += 1
            compression2.check_mates(dict[i[0] - 1][i[1] - 1], Data[i[0] - 1],
                                     Data[i[1] - 1], datar_index,
                                     koy_number_data_index, file2, i[0] - 1,
                                     i[1] - 1)  #,Data[i[3
            k += 1
    fff = str(num)
    for i in Name:
        fff = fff + "|" + i
    fff += "\n"
    file.write(fff)
    compression2.write_in_binary_in_file(num, koy_number_data_index, file)
    '''
    compression2.give_a_string_put_in_byte_array2(compression2.final_final_string2) 
    compression2.give_a_string_put_in_byte_array(compression2.final_final_string)   
    #file.write(compression2.final_bit2)
    file2.write(compression2.final_bit)
    '''
    file2.write(compression2.final_final_string)
    file2.flush()
    #file2.write(MAP_FOR_SYMBOL2['#'])

    file2.close()
    file.close()

    print("ref", total_ref, "non", total_non_ref)

    if (check == solution):
        print("now ok ")
    else:
        print("still problem", solution, check)
    '''import bz2
    compressionLevel=9
    tarbz2contents1 = bz2.compress(open(fileName, 'rb').read(), compressionLevel)
    tarbz2contents2 = bz2.compress(open(fileName2, 'rb').read(), compressionLevel)
    import tarfile
    tarname = fileName+'sifatmast.tar.gz'
    tar = tarfile.open(tarname, "w:gz")
    tar.add(fileName)
    tar.close()
    tarname2 = fileName+'fialmast.tar.gz'
    tar = tarfile.open(tarname2, "w:gz")
    tar.add(fileName2)
    tar.close()

    size1 = os.path.getsize(tarname)
    size1+= os.path.getsize(tarname2)
    '''
    locfile = fileName
    loczip = fileName + "sifatmast.zip"
    zip = zipfile.ZipFile(loczip, "w", zipfile.ZIP_DEFLATED)
    zip.write(locfile)
    zip.close()
    size1 = os.path.getsize(fileName + 'sifatmast.zip')

    locfile = fileName2
    loczip = fileName2 + "fialmast.zip"
    zip = zipfile.ZipFile(loczip, "w", zipfile.ZIP_DEFLATED)
    zip.write(locfile)
    zip.close()
    size1 += os.path.getsize(fileName2 + 'fialmast.zip')
    '''file=open(fileName,'wb')#
    file2=open(fileName2,'wb')#
    file2.seek(0)
    file2.truncate()
    file.seek(0)
    file.truncate()

    file.write(tarbz2contents1)
    file2.write(tarbz2contents2)

    file.flush()
    file2.flush()

    file.close()
    file2.close()
 
    #print('filenames are: ', fileName , '   ' , fileName2)
    ff1 = open(fileName)
    ff2 = open(fileName2) 
    size1 = os.path.getsize(fileName)
    size2 = os.path.getsize(fileName2)'''
    #size1 = sys.getsizeof(tarbz2contents1) + sys.getsizeof(tarbz2contents2)
    #print('size: ' + str(size1))

    n1 = fileName.split('/')
    name = ''
    for xx in n1:
        if (xx != 'sifatscompression.txt'):
            name += xx + '/'

    name = name[:-8]
    #print('name: ' + name + ' , size: ' + str(size1))
    output_file2.write(name + '    ' + str(size1) + '\n')
    output_file2.flush()
    #ff1.close()
    #ff2.close()
    #os.rmdir(FolderName)
    BITSINNUM = 12
    BITSIID = 12
    PER_CHARACTER_BIT = 0  #

    PER_CHARACTER_BIT2 = 0  #
    MAP_FOR_SYMBOL2 = {}  #

    compression2.MAP_FOR_SYMBOL2 = {}
    compression2.final_final_string = ''  #
    compression2.final_bit = bytearray()
    compression2.final_bit2 = bytearray()
    compression2.final_final_string2 = ''
Ejemplo n.º 5
0
def compressDataExpectation(fileName):
    global BITSIID
    global BITSINNUM
    Name = {}
    # global MAP_FOR_SYMBOL
    num, Data, different_characters, Name = loadData(fileName)  #
    different_characters = format(different_characters, '026b')  #
    print("uck", different_characters)  #
    foo = num + 1  #
    global PER_CHARACTER_BIT  #
    global PER_CHARACTER_BIT2  #
    dict = [[0 for i in range(foo)] for j in range(foo)]  #
    DF = 4
    print(num, len(Data[1]))
    BITSINNUM = int(math.log2(len(Data[0]))) + 1
    BITSIID = int(math.log2(num)) + 1
    different_characters = different_characters[::-1]  #
    for i in different_characters:  #
        if i == '1':  #
            DF += 1  #
    PER_CHARACTER_BIT = math.ceil((math.log2((DF))))  #
    PER_CHARACTER_BIT2 = math.ceil((math.log2((DF))))  #DF-1
    j = 0  #
    k = 0  #
    for i in different_characters:  #
        if i == '1':  #
            #   d=format(j,'0'+str(PER_CHARACTER_BIT)+'b')#
            #  MAP_FOR_SYMBOL[chr(ord('a')+k)]=d#
            #    print(chr(ord('a')+k),d)
            d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b')  #
            MAP_FOR_SYMBOL2[chr(ord('a') + k)] = d  #
            j += 1  #
        k += 1  #
    d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b')  #
    # MAP_FOR_SYMBOL['-']=d#
    print('-', d)
    #print("n\n\n\n\n"+MAP_FOR_SYMBOL['-']+"\m\n\\n\n\n\n")
    d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b')  #
    MAP_FOR_SYMBOL2['-'] = d  #
    j += 1  #
    d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b')  #
    #MAP_FOR_SYMBOL['#']=d#
    print('#', d)
    d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b')  #
    MAP_FOR_SYMBOL2['#'] = d  #
    j += 1  #
    d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b')  #
    #MAP_FOR_SYMBOL['$']=d#
    MAP_FOR_SYMBOL2['$'] = d  #symbol2 amar
    print('$', d)
    #  compression2.MAP_FOR_SYMBOL=MAP_FOR_SYMBOL
    compression2.MAP_FOR_SYMBOL2 = MAP_FOR_SYMBOL2
    print("GENE NUMBER ", num)  #
    totla_mem = len(Data[0]) * 3 * len(Data)
    #print("Total memory "+ str(totla_mem))
    #getDiff('abcdabcdabcdb','aaaaabcdaaaaa')

    prob = getProbability(Data, 1)
    #print(prob)

    order3 = getExpectations_2(Data, prob, 1)
    ###################
    own = []
    for d in Data:
        own.append(getReduceVal(d))

    weights1, dict = getmatrix(Data, num, 7, 3, order3)
    ##########################

    Edges = []
    for i in range(num):
        for j in range(num):

            if (i == j):
                Edges.append((0, i + 1, own[i], 0, i + 1))
            elif weights1[i][j] == -1:
                continue
            else:
                Edges.append((i + 1, j + 1, weights1[i][j], i + 1, j + 1))
    solution, refmap2 = Dmst.dmst(num + 1, Edges, 0)
    New_mem2 = solution + +BITSIID * num

    print("--->", New_mem2 / totla_mem)

    check = 0

    file = open("sifatscompression.txt", 'w')  #
    file2 = open("fial.txt", 'w')  #
    file2.seek(0)
    file2.truncate()
    file2.close()
    file.seek(0)
    file.truncate()
    file.close()
    file = open("sifatscompression.txt", 'w')  #
    file2 = open("fial.txt", 'w')  #
    compression2.final_final_string += different_characters
    #file2.write(different_characters)#
    datar_index = BITSINNUM  #
    koy_number_data_index = BITSIID  #
    if datar_index % 2 != 0:  #
        datar_index += 1  #
    if koy_number_data_index % 2 != 0:  #
        koy_number_data_index += 1  #
    datar_index = datar_index / 2  #
    datar_index = math.ceil(datar_index)  #
    koy_number_data_index = koy_number_data_index / 2  #
    koy_number_data_index = math.ceil(koy_number_data_index)  #
    a = format(datar_index, '032b')  #
    # file2.write(a)#
    compression2.final_final_string += a
    a = format(koy_number_data_index, '032b')  #
    #file2.write(a)#
    compression2.final_final_string += a
    '''
    u=(int)(len(compression2.final_final_string)/8)
    u=math.floor(u)
    if(u>=1):
        compression2.give_a_string_put_in_byte_array(compression2.final_final_string[:u*8])
        compression2.final_final_string=compression2.final_final_string[u*8:]
    '''
    k = 0  #
    bal = 0  #
    total_ref = 0  #
    total_non_ref = 0  #
    compression2.final_final_string2 += compression2.get_binary_in_file(
        num, koy_number_data_index)
    for i in refmap2:
        print(i[0], i[1])
        j = i[0]
        k = i[1]
        bal += 1  #
        if (j == 0):
            check += own[k - 1]
        else:
            check += weights1[j - 1][k - 1]
        if (i[0] == 0):
            total_ref += 1
            file.write(str(i[1] - 1) + ',' + Data[i[1] - 1] + "\n")
        else:
            total_non_ref += 1
            compression2.check_mates(dict[i[0] - 1][i[1] - 1], Data[i[0] - 1],
                                     Data[i[1] - 1], datar_index,
                                     koy_number_data_index, file2, i[0] - 1,
                                     i[1] - 1)  #,Data[i[3
            k += 1
    fff = str(num)
    for i in Name:
        fff = fff + "|" + i
    fff += "\n"
    file.write(fff)
    compression2.write_in_binary_in_file(num, koy_number_data_index, file)
    '''
    compression2.give_a_string_put_in_byte_array2(compression2.final_final_string2) 

    compression2.give_a_string_put_in_byte_array(compression2.final_final_string) 
    
    file2.write(compression2.final_bit)
    '''
    file2.write(compression2.final_final_string)
    file2.flush()
    file2.close()
    file.close()
    print("habijabi", total_ref, "non", total_non_ref)

    if (check == solution):
        print("now ok ")
    else:
        print("still problem", solution, check)