def compressData(fileName): num, Data = loadData(fileName) #Data=Data[:10] #num=10 print(Data[0:2]) print(num) totla_mem = len(Data[0]) * 3 * len(Data) print("Total memory " + str(totla_mem)) #getDiff('abcdabcdabcdb','aaaaabcdaaaaa') weights = getmatrix(Data, num) # this weights is the cost martrix own = [] for d in Data: own.append(getReduceVal(d)) Edges = [] for i in range(num): for j in range(num): if (i == j): Edges.append((0, i + 1, own[i], 0, i + 1)) else: Edges.append((i + 1, j + 1, weights[i][j], i + 1, j + 1)) New_mem, refmap = Dmst.dmst(num + 1, Edges, 0) print(New_mem) print(New_mem / totla_mem) for i in refmap: print(i[0], " -> ", i[1])
def compressDataExpectation(fileName, window, overlap): global BITSINNUM global BITSIID start = time.time() Name = [] num, Data, Name = loadData(fileName) # BITSIID = int(math.log10(num)) * 4 BITSINNUM = int(math.log10(len(Data[0]))) * 4 prob = getProbability(Data, 1) order3 = getExpectations_2(Data, prob, 1) own = [] for d in Data: own.append(getReduceVal(d)) weights1, dict = getmatrix(Data, num, window, overlap, order3) del order3[:] Edges = [] for i in range(num): for j in range(num): if (i == j): Edges.append((0, i + 1, own[i], 0, i + 1)) elif (i, j) not in weights1: continue else: Edges.append((i + 1, j + 1, weights1[i, j], i + 1, j + 1)) solution, refmap2 = Dmst.dmst(num + 1, Edges, 0) del own[:] del weights1 import os FolderName = fileName + '.mstcom/' os.makedirs(os.path.dirname(FolderName), exist_ok=True) fileName = FolderName + "ref.txt" fileName2 = FolderName + "metadata.txt" file = open(fileName, 'w') # file2 = open(fileName2, 'w') # k = 0 # bal = 0 # total_ref = 0 # total_non_ref = 0 # for i in refmap2: #print( i[0],i[1]) j = i[0] k = i[1] bal += 1 # if (i[0] == 0): total_ref += 1 file.write(str(i[1] - 1) + ',' + Data[i[1] - 1] + "\n") else: total_non_ref += 1 compression2.check_mates(dict[i[0] - 1, i[1] - 1], Data[i[0] - 1], Data[i[1] - 1], file2, i[0] - 1, i[1] - 1) #,Data[i[3 k += 1 fff = str(num) for i in Name: fff = fff + "|" + i fff += "\n" file.write(fff) file2.write(compression2.final_final_string) file2.flush() file2.close() file.close() #print("ref",total_ref,"non",total_non_ref) import bz2 compressionLevel = 9 tarbz2contents1 = bz2.compress( open(fileName, 'rb').read(), compressionLevel) tarbz2contents2 = bz2.compress( open(fileName2, 'rb').read(), compressionLevel) file = open(fileName, 'wb') # file2 = open(fileName2, 'wb') # file.write(tarbz2contents1) file2.write(tarbz2contents2) file.flush() file2.flush() file.close() file2.close() size1_7 = sys.getsizeof(tarbz2contents1) size2_2 = sys.getsizeof(tarbz2contents2) size3_72 = size1_7 + size2_2 print('compressed size ' + str(size3_72))
def compressDataExpectation(fileName, window, opverlap, output_file2, output_file3): global BITSINNUM global BITSIID start = time.time() Name = [] num, Data, Name = loadData(fileName) # BITSIID = int(math.log10(num)) * 4 BITSINNUM = int(math.log10(len(Data[0]))) * 4 prob = getProbability(Data, 1) order3 = getExpectations_2(Data, prob, 1) own = [] for d in Data: own.append(getReduceVal(d)) weights1, dict = getmatrix(Data, num, window, overlap, order3) del order3[:] Edges = [] for i in range(num): for j in range(num): if (i == j): Edges.append((0, i + 1, own[i], 0, i + 1)) elif (i, j) not in weights1: continue else: Edges.append((i + 1, j + 1, weights1[i, j], i + 1, j + 1)) solution, refmap2 = Dmst.dmst(num + 1, Edges, 0) del own[:] del weights1 #New_mem2 = solution + +BITSIID*num import os FolderName = fileName + '.mstcom\\' gg = fileName os.makedirs(os.path.dirname(FolderName), exist_ok=True) fileName = FolderName + "ref.txt" fileName2 = FolderName + "metadata.txt" file = open(fileName, 'w') # file2 = open(fileName2, 'w') # k = 0 # bal = 0 # total_ref = 0 # total_non_ref = 0 # for i in refmap2: #print( i[0],i[1]) j = i[0] k = i[1] bal += 1 # if (i[0] == 0): total_ref += 1 file.write(str(i[1] - 1) + ',' + Data[i[1] - 1] + "\n") else: total_non_ref += 1 compression2.check_mates(dict[i[0] - 1, i[1] - 1], Data[i[0] - 1], Data[i[1] - 1], file2, i[0] - 1, i[1] - 1) #,Data[i[3 k += 1 fff = str(num) for i in Name: fff = fff + "|" + i fff += "\n" file.write(fff) # compression2.write_in_binary_in_file(num,koy_number_data_index,file) file2.write(compression2.final_final_string) file2.flush() file2.close() file.close() print("ref", total_ref, "non", total_non_ref) n1 = fileName.split('/') name = '' for xx in n1: if (xx != 'sifatscompression.txt'): name += xx + '/' name = name[:-8] import bz2 compressionLevel = 9 tarbz2contents1 = bz2.compress( open(fileName, 'rb').read(), compressionLevel) tarbz2contents2 = bz2.compress( open(fileName2, 'rb').read(), compressionLevel) ''' import lzma Lzip = lzma.LZMACompressor() tarbz2contents1 = Lzip.compress(open(fileName, 'rb').read()) tarbz2contents1+= Lzip.flush() ''' end = time.time() size1_7 = sys.getsizeof(tarbz2contents1) #tarbz2contents2 = bz2.compress(open(fileName2, 'rb').read(), compressionLevel) #Lzip = lzma.LZMACompressor() #tarbz2contents2 = Lzip.compress(open(fileName2, 'rb').read()) #tarbz2contents2+= Lzip.flush() size2_2 = sys.getsizeof(tarbz2contents2) size3_72 = size1_7 + size2_2 output_file2.write(gg + ' ' + str(size3_72) + '\n') output_file2.flush() #tarbz2contents1 = bz2.compress(open(fileName, 'rb').read(), compressionLevel) ''' size1_2 = sys.getsizeof(tarbz2contents1) size3_27 = size1_2 + size2_7 output_file2.write(name + 'sifat bzip fial 7 ' + str(size1_2)+ ' '+str(size2_7)+ ' '+str(size3_27) + '\n') output_file2.flush() ''' output_file3.write(gg + ' ' + str(end - start) + '\n') output_file2.flush() file = open(fileName, 'wb') # file2 = open(fileName2, 'wb') # file.write(tarbz2contents1) file2.write(tarbz2contents2) file.flush() file2.flush() file.close() file2.close() #os.remove(fileName) #os.remove(fileName2) compression2.MAP_FOR_SYMBOL2 = {} compression2.final_final_string = '' # compression2.final_bit = bytearray() compression2.final_bit2 = bytearray() compression2.final_final_string2 = ''
def compressDataExpectation(fileName): global BITSIID global BITSINNUM global MAP_FOR_SYMBOL2 Name = {} num, Data, different_characters, Name = loadData(fileName) # different_characters = format(different_characters, '026b') # #print("uck",different_characters)# foo = num + 1 # global PER_CHARACTER_BIT # global PER_CHARACTER_BIT2 # dict = [[0 for i in range(foo)] for j in range(foo)] # DF = 4 #print(num, len(Data[1])) BITSINNUM = int(math.log2(len(Data[0]))) + 1 BITSIID = int(math.log2(num)) + 1 different_characters = different_characters[::-1] # for i in different_characters: # if i == '1': # DF += 1 # PER_CHARACTER_BIT = math.ceil((math.log2((DF)))) # PER_CHARACTER_BIT2 = math.ceil((math.log2((DF)))) #DF-1 j = 0 # k = 0 # for i in different_characters: # if i == '1': # d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b') # d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b') # MAP_FOR_SYMBOL2[chr(ord('a') + k)] = d # j += 1 # k += 1 # d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b') # print('-', d) #print("n\n\n\n\n"+MAP_FOR_SYMBOL['-']+"\m\n\\n\n\n\n") d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b') # MAP_FOR_SYMBOL2['-'] = d # j += 1 # d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b') # print('#', d) d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b') # MAP_FOR_SYMBOL2['#'] = d # j += 1 # d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b') # MAP_FOR_SYMBOL2['$'] = d #symbol2 amar print('$', d) j += 1 # d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b') # MAP_FOR_SYMBOL2['?'] = d #symbol2 amar print('?', d) compression2.MAP_FOR_SYMBOL2 = MAP_FOR_SYMBOL2 print("GENE NUMBER ", num) # totla_mem = len(Data[0]) * 3 * len(Data) #print("Total memory "+ str(totla_mem)) #getDiff('abcdabcdabcdb','aaaaabcdaaaaa') prob = getProbability(Data, 1) #print(prob) order3 = getExpectations_2(Data, prob, 1) ################### own = [] for d in Data: own.append(getReduceVal(d)) weights1, dict = getmatrix(Data, num, 5, 3, order3) ########################## Edges = [] for i in range(num): for j in range(num): if (i == j): Edges.append((0, i + 1, own[i], 0, i + 1)) elif weights1[i][j] == -1: continue else: Edges.append((i + 1, j + 1, weights1[i][j], i + 1, j + 1)) solution, refmap2 = Dmst.dmst(num + 1, Edges, 0) New_mem2 = solution + +BITSIID * num #print("new memory",New_mem2/8) #print("--->",New_mem2/totla_mem) check = 0 import os FolderName = fileName + '.mstcom/' os.makedirs(os.path.dirname(FolderName), exist_ok=True) fileName = FolderName + "sifatscompression.txt" fileName2 = FolderName + "fial" file = open(fileName, 'w') # file2 = open(fileName2, 'w') # file2.seek(0) file2.truncate() file2.close() file.seek(0) file.truncate() file.close() file = open(fileName, 'w') # file2 = open(fileName2, 'w') # compression2.final_final_string += different_characters #file2.write(different_characters)# datar_index = BITSINNUM # koy_number_data_index = BITSIID # if datar_index % 2 != 0: # datar_index += 1 # if koy_number_data_index % 2 != 0: # koy_number_data_index += 1 # datar_index = datar_index / 2 # datar_index = math.ceil(datar_index) # koy_number_data_index = koy_number_data_index / 2 # koy_number_data_index = math.ceil(koy_number_data_index) # a = format(datar_index, '032b') # # file2.write(a)# compression2.final_final_string += a a = format(koy_number_data_index, '032b') # #file2.write(a)# compression2.final_final_string += a ''' u=(int)(len(compression2.final_final_string)/8) u=math.floor(u) if(u>=1): compression2.give_a_string_put_in_byte_array(compression2.final_final_string[:u*8]) compression2.final_final_string=compression2.final_final_string[u*8:] ''' k = 0 # bal = 0 # total_ref = 0 # total_non_ref = 0 # # compression2.final_final_string2+=compression2.get_binary_in_file(num,koy_number_data_index) for i in refmap2: #print( i[0],i[1]) j = i[0] k = i[1] bal += 1 # if (j == 0): check += own[k - 1] else: check += weights1[j - 1][k - 1] if (i[0] == 0): total_ref += 1 # print(str(i[1]-1)+','+Data[i[1]-1]+"\n") file.write(str(i[1] - 1) + ',' + Data[i[1] - 1] + "\n") else: total_non_ref += 1 compression2.check_mates(dict[i[0] - 1][i[1] - 1], Data[i[0] - 1], Data[i[1] - 1], datar_index, koy_number_data_index, file2, i[0] - 1, i[1] - 1) #,Data[i[3 k += 1 fff = str(num) for i in Name: fff = fff + "|" + i fff += "\n" file.write(fff) compression2.write_in_binary_in_file(num, koy_number_data_index, file) ''' compression2.give_a_string_put_in_byte_array2(compression2.final_final_string2) compression2.give_a_string_put_in_byte_array(compression2.final_final_string) #file.write(compression2.final_bit2) file2.write(compression2.final_bit) ''' file2.write(compression2.final_final_string) file2.flush() #file2.write(MAP_FOR_SYMBOL2['#']) file2.close() file.close() print("ref", total_ref, "non", total_non_ref) if (check == solution): print("now ok ") else: print("still problem", solution, check) '''import bz2 compressionLevel=9 tarbz2contents1 = bz2.compress(open(fileName, 'rb').read(), compressionLevel) tarbz2contents2 = bz2.compress(open(fileName2, 'rb').read(), compressionLevel) import tarfile tarname = fileName+'sifatmast.tar.gz' tar = tarfile.open(tarname, "w:gz") tar.add(fileName) tar.close() tarname2 = fileName+'fialmast.tar.gz' tar = tarfile.open(tarname2, "w:gz") tar.add(fileName2) tar.close() size1 = os.path.getsize(tarname) size1+= os.path.getsize(tarname2) ''' locfile = fileName loczip = fileName + "sifatmast.zip" zip = zipfile.ZipFile(loczip, "w", zipfile.ZIP_DEFLATED) zip.write(locfile) zip.close() size1 = os.path.getsize(fileName + 'sifatmast.zip') locfile = fileName2 loczip = fileName2 + "fialmast.zip" zip = zipfile.ZipFile(loczip, "w", zipfile.ZIP_DEFLATED) zip.write(locfile) zip.close() size1 += os.path.getsize(fileName2 + 'fialmast.zip') '''file=open(fileName,'wb')# file2=open(fileName2,'wb')# file2.seek(0) file2.truncate() file.seek(0) file.truncate() file.write(tarbz2contents1) file2.write(tarbz2contents2) file.flush() file2.flush() file.close() file2.close() #print('filenames are: ', fileName , ' ' , fileName2) ff1 = open(fileName) ff2 = open(fileName2) size1 = os.path.getsize(fileName) size2 = os.path.getsize(fileName2)''' #size1 = sys.getsizeof(tarbz2contents1) + sys.getsizeof(tarbz2contents2) #print('size: ' + str(size1)) n1 = fileName.split('/') name = '' for xx in n1: if (xx != 'sifatscompression.txt'): name += xx + '/' name = name[:-8] #print('name: ' + name + ' , size: ' + str(size1)) output_file2.write(name + ' ' + str(size1) + '\n') output_file2.flush() #ff1.close() #ff2.close() #os.rmdir(FolderName) BITSINNUM = 12 BITSIID = 12 PER_CHARACTER_BIT = 0 # PER_CHARACTER_BIT2 = 0 # MAP_FOR_SYMBOL2 = {} # compression2.MAP_FOR_SYMBOL2 = {} compression2.final_final_string = '' # compression2.final_bit = bytearray() compression2.final_bit2 = bytearray() compression2.final_final_string2 = ''
def compressDataExpectation(fileName): global BITSIID global BITSINNUM Name = {} # global MAP_FOR_SYMBOL num, Data, different_characters, Name = loadData(fileName) # different_characters = format(different_characters, '026b') # print("uck", different_characters) # foo = num + 1 # global PER_CHARACTER_BIT # global PER_CHARACTER_BIT2 # dict = [[0 for i in range(foo)] for j in range(foo)] # DF = 4 print(num, len(Data[1])) BITSINNUM = int(math.log2(len(Data[0]))) + 1 BITSIID = int(math.log2(num)) + 1 different_characters = different_characters[::-1] # for i in different_characters: # if i == '1': # DF += 1 # PER_CHARACTER_BIT = math.ceil((math.log2((DF)))) # PER_CHARACTER_BIT2 = math.ceil((math.log2((DF)))) #DF-1 j = 0 # k = 0 # for i in different_characters: # if i == '1': # # d=format(j,'0'+str(PER_CHARACTER_BIT)+'b')# # MAP_FOR_SYMBOL[chr(ord('a')+k)]=d# # print(chr(ord('a')+k),d) d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b') # MAP_FOR_SYMBOL2[chr(ord('a') + k)] = d # j += 1 # k += 1 # d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b') # # MAP_FOR_SYMBOL['-']=d# print('-', d) #print("n\n\n\n\n"+MAP_FOR_SYMBOL['-']+"\m\n\\n\n\n\n") d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b') # MAP_FOR_SYMBOL2['-'] = d # j += 1 # d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b') # #MAP_FOR_SYMBOL['#']=d# print('#', d) d = format(j, '0' + str(PER_CHARACTER_BIT2) + 'b') # MAP_FOR_SYMBOL2['#'] = d # j += 1 # d = format(j, '0' + str(PER_CHARACTER_BIT) + 'b') # #MAP_FOR_SYMBOL['$']=d# MAP_FOR_SYMBOL2['$'] = d #symbol2 amar print('$', d) # compression2.MAP_FOR_SYMBOL=MAP_FOR_SYMBOL compression2.MAP_FOR_SYMBOL2 = MAP_FOR_SYMBOL2 print("GENE NUMBER ", num) # totla_mem = len(Data[0]) * 3 * len(Data) #print("Total memory "+ str(totla_mem)) #getDiff('abcdabcdabcdb','aaaaabcdaaaaa') prob = getProbability(Data, 1) #print(prob) order3 = getExpectations_2(Data, prob, 1) ################### own = [] for d in Data: own.append(getReduceVal(d)) weights1, dict = getmatrix(Data, num, 7, 3, order3) ########################## Edges = [] for i in range(num): for j in range(num): if (i == j): Edges.append((0, i + 1, own[i], 0, i + 1)) elif weights1[i][j] == -1: continue else: Edges.append((i + 1, j + 1, weights1[i][j], i + 1, j + 1)) solution, refmap2 = Dmst.dmst(num + 1, Edges, 0) New_mem2 = solution + +BITSIID * num print("--->", New_mem2 / totla_mem) check = 0 file = open("sifatscompression.txt", 'w') # file2 = open("fial.txt", 'w') # file2.seek(0) file2.truncate() file2.close() file.seek(0) file.truncate() file.close() file = open("sifatscompression.txt", 'w') # file2 = open("fial.txt", 'w') # compression2.final_final_string += different_characters #file2.write(different_characters)# datar_index = BITSINNUM # koy_number_data_index = BITSIID # if datar_index % 2 != 0: # datar_index += 1 # if koy_number_data_index % 2 != 0: # koy_number_data_index += 1 # datar_index = datar_index / 2 # datar_index = math.ceil(datar_index) # koy_number_data_index = koy_number_data_index / 2 # koy_number_data_index = math.ceil(koy_number_data_index) # a = format(datar_index, '032b') # # file2.write(a)# compression2.final_final_string += a a = format(koy_number_data_index, '032b') # #file2.write(a)# compression2.final_final_string += a ''' u=(int)(len(compression2.final_final_string)/8) u=math.floor(u) if(u>=1): compression2.give_a_string_put_in_byte_array(compression2.final_final_string[:u*8]) compression2.final_final_string=compression2.final_final_string[u*8:] ''' k = 0 # bal = 0 # total_ref = 0 # total_non_ref = 0 # compression2.final_final_string2 += compression2.get_binary_in_file( num, koy_number_data_index) for i in refmap2: print(i[0], i[1]) j = i[0] k = i[1] bal += 1 # if (j == 0): check += own[k - 1] else: check += weights1[j - 1][k - 1] if (i[0] == 0): total_ref += 1 file.write(str(i[1] - 1) + ',' + Data[i[1] - 1] + "\n") else: total_non_ref += 1 compression2.check_mates(dict[i[0] - 1][i[1] - 1], Data[i[0] - 1], Data[i[1] - 1], datar_index, koy_number_data_index, file2, i[0] - 1, i[1] - 1) #,Data[i[3 k += 1 fff = str(num) for i in Name: fff = fff + "|" + i fff += "\n" file.write(fff) compression2.write_in_binary_in_file(num, koy_number_data_index, file) ''' compression2.give_a_string_put_in_byte_array2(compression2.final_final_string2) compression2.give_a_string_put_in_byte_array(compression2.final_final_string) file2.write(compression2.final_bit) ''' file2.write(compression2.final_final_string) file2.flush() file2.close() file.close() print("habijabi", total_ref, "non", total_non_ref) if (check == solution): print("now ok ") else: print("still problem", solution, check)