def send(self, receiver, P, maxNoise): #Original Text print('\nOriginal Size: {} bytes\nOriginal Text Entropy: {}'.format( len(self.text), calculateEntropy(self.text))) #Each character is 1 byte print("\nCompressing with LZ78...") #Compressed Text compressed = compress(self.text) print( '\nCompressed Size: {} bytes\nCompressed Text Entropy: {}'.format( len(compressed) / 8, calculateEntropy(compressed))) #Each 8 bits are 1 byte #Encoding with linear code print("\nEncoding with Linear Code...\n") L = LC(P, maxNoise) encoded = L.encode(compressed) print("\nEncoding with base64...\n") base64encoded = b64encode(encoded) json = dumps({ "compression_algorithm": "LZ78", "code": { "name": "linear", "P": P }, "base64encoded": base64encoded }) print("Sending JSON...\n") receiver.receive(json)
def getCompSizes(inFile,stringSize): f = open(inFile) eof = os.path.getsize(inFile) i=0 l = [] while i<1000000 and eof-i>=stringSize: f.seek(i) l+=[compress(f.read(stringSize))] i+=1 f.close() return l
l=string.printable for i in range(n): s+=l[rand.randrange(len(l))] return s #f=open(sys.argv[1]) #s=f.read(1000) l=string.printable s='' for j in range(10000): s+=l[rand.randrange(len(l))] res,x = [],[] for i in range(1,10000): res+=[compress(s[:i])] x+=[i] #f.close() #plt.plot(x,x,'red',label='linear') #plt.plot(x,res, label='random') plt.plot(x,[x[i]/float(res[i]) for i in range(len(x))], label='random') plt.xlabel('Uncompressed String Length') plt.ylabel('Compression ratio') plt.legend(bbox_to_anchor=(0.3,0.9)) plt.show()
from lz78 import compress import sys, random as rand import matplotlib.pyplot as plt from math import log f=open('genome.fa') s,r=f.read(2),'ag' randRes, res,x = [],[],[] alpha='agct' maxString = int(sys.argv[1]) for i in range(1,int(log(maxString,2))): s+=f.read(2**i) for j in range(2**i): r+=alpha[rand.randrange(4)] res+=[compress(s)] randRes+=[compress(r)] x+=[len(s)] f.close() plt.plot(x,x,label='linear',color='red') plt.plot(x,res) plt.plot(x,randRes) plt.xlabel('Uncompressed String Length') plt.ylabel('Compressed Length') plt.legend(bbox_to_anchor=(0.3,0.9))
f=open('englishText.txt') g=open('genome.fa') r,s,h ='&d','aa', 'ac' compRand, compUni, genome, x = [],[],[],[] l=string.printable maxString = int(sys.argv[1]) for i in range(1,int(log(maxString,2))): s+=f.read(2**i) h+=g.read(2**i) for j in range(2**i): r+=l[rand.randrange(len(l))] compUni+=[compress(s)] compRand+=[compress(r)] genome+=[compress(h)] x+=[len(s)] f.close() g.close() plt.plot(x, [float(x[i])/genome[i] for i in range(len(x))], label = 'genome') plt.plot(x,[float(x[i])/compUni[i] for i in range(len(x))],label='average') plt.plot(x,[float(x[i])/compRand[i] for i in range(len(x))],label='random') #plt.plot(x,[sqrt(n/2) for n in x],label='best case') #plt.plot(x,map(lambda z: sqrt(2*z),x), label='uniform') plt.xlabel('String Length') plt.ylabel('Compression Ratio')
from lz78 import compress import string import matplotlib.pyplot as plt from math import sqrt s = 'aa' compUni, x = [], [] l = string.printable for i in range(1, 13): s += 2**i * 'a' compUni += [compress(s)] x += [len(s)] plt.plot(map(lambda z: sqrt(2 * z), x), compUni) plt.ylabel('Compressed Size') plt.xlabel('sqrt(2n)') plt.show()
from lz78 import compress import sys import matplotlib.pyplot as plt f = open(sys.argv[1]) s=f.read(2) y=[] for i in range(10000): s+=f.read(1) y+=[len(s)/float(compress(s))] f.close() plt.plot(y) plt.show()
import time,string, random as r from lz78 import compress import matplotlib.pyplot as plt from math import log l=string.printable f='' res=[] for i in range(1000): # initialize f to a 1000 letter string f+=l[r.randrange(len(l))] x=[] for i in range(12): for j in range(2**i*1000): f+=l[r.randrange(0,len(l))] x+=[2**i*1000] t = time.time() compress(f) t=time.time()-t res+=[t] plt.plot(x,res) plt.ylabel('Time') plt.xlabel('String Length') #plt.plot(range(len(res)),map(lambda z: log(z,2),res)) plt.show()
import string import random as r from lz78 import compress import matplotlib.pyplot as plt from math import log stringlength = 10000 l=string.printable xaxis = [] res=[] for i in range(1,21): f='' x=i*(len(l)//20) xaxis+=[x] for j in range(stringlength): f+=l[r.randrange(x)] res+=[stringlength/float(compress(f))] plt.plot(xaxis,res) plt.xlabel('Alphabet Size') plt.ylabel('Compression Ratio') #plt.yscale('log') plt.show()
import string import random as r from lz78 import compress import matplotlib.pyplot as plt from math import log stringlength = 10000 l = string.printable xaxis = [] res = [] for i in range(1, 21): f = '' x = i * (len(l) // 20) xaxis += [x] for j in range(stringlength): f += l[r.randrange(x)] res += [stringlength / float(compress(f))] plt.plot(xaxis, res) plt.xlabel('Alphabet Size') plt.ylabel('Compression Ratio') #plt.yscale('log') plt.show()
from lz78 import compress import string import matplotlib.pyplot as plt from math import sqrt s='aa' compUni, x = [],[] l=string.printable for i in range(1,13): s+=2**i*'a' compUni+=[compress(s)] x+=[len(s)] plt.plot(map(lambda z: sqrt(2*z),x),compUni) plt.ylabel('Compressed Size') plt.xlabel('sqrt(2n)') plt.show()
f = open('englishText.txt') g = open('genome.fa') r, s, h = '&d', 'aa', 'ac' compRand, compUni, genome, x = [], [], [], [] l = string.printable maxString = int(sys.argv[1]) for i in range(1, int(log(maxString, 2))): s += f.read(2**i) h += g.read(2**i) for j in range(2**i): r += l[rand.randrange(len(l))] compUni += [compress(s)] compRand += [compress(r)] genome += [compress(h)] x += [len(s)] f.close() g.close() plt.plot(x, [float(x[i]) / genome[i] for i in range(len(x))], label='genome') plt.plot(x, [float(x[i]) / compUni[i] for i in range(len(x))], label='average') plt.plot(x, [float(x[i]) / compRand[i] for i in range(len(x))], label='random') #plt.plot(x,[sqrt(n/2) for n in x],label='best case') #plt.plot(x,map(lambda z: sqrt(2*z),x), label='uniform') plt.xlabel('String Length') plt.ylabel('Compression Ratio')
import matplotlib.pyplot as plt from math import log,sqrt f=open('englishText.txt') r,s='&d','aa' compRand, compUni, x = [],[],[] l=string.printable maxString = int(sys.argv[1]) for i in range(1,int(log(maxString,2))): s+=f.read(2**i) for j in range(2**i): r+=l[rand.randrange(len(l))] compUni+=[compress(s)] compRand+=[compress(r)] x+=[len(s)] f.close() plt.plot(x,compUni,'g--', label='average') plt.plot(x,compRand,'b-.',label='random') plt.plot(x,x,label='linear',color='red') plt.plot(x,map(lambda z: sqrt(2*z),x), 'k:',label='optimal',markersize=20) plt.xlabel('Uncompressed String Length') plt.ylabel('Compressed Length') plt.legend(bbox_to_anchor=(0.3,0.9))
import sys, random as rand import matplotlib.pyplot as plt from math import log f = open('genome.fa') s, r = f.read(2), 'ag' randRes, res, x = [], [], [] alpha = 'agct' maxString = int(sys.argv[1]) for i in range(1, int(log(maxString, 2))): s += f.read(2**i) for j in range(2**i): r += alpha[rand.randrange(4)] res += [compress(s)] randRes += [compress(r)] x += [len(s)] f.close() plt.plot(x, x, label='linear', color='red') plt.plot(x, res) plt.plot(x, randRes) plt.xlabel('Uncompressed String Length') plt.ylabel('Compressed Length') plt.legend(bbox_to_anchor=(0.3, 0.9)) plt.show()
import matplotlib.pyplot as plt from math import log, sqrt f = open('englishText.txt') r, s = '&d', 'aa' compRand, compUni, x = [], [], [] l = string.printable maxString = int(sys.argv[1]) for i in range(1, int(log(maxString, 2))): s += f.read(2**i) for j in range(2**i): r += l[rand.randrange(len(l))] compUni += [compress(s)] compRand += [compress(r)] x += [len(s)] f.close() plt.plot(x, compUni, 'g--', label='average') plt.plot(x, compRand, 'b-.', label='random') plt.plot(x, x, label='linear', color='red') plt.plot(x, map(lambda z: sqrt(2 * z), x), 'k:', label='optimal', markersize=20) plt.xlabel('Uncompressed String Length')
import string, sys,random as rand import matplotlib.pyplot as plt from math import log,sqrt g=open('genome.fa') r='ac' genome, x = [],[] maxString = int(sys.argv[1]) for i in range(1,maxString): r+=g.read(1) genome+=[compress(r)] x+=[len(r)] g.close() plt.plot(x, [float(x[i])/genome[i] for i in range(len(x))], label = 'genome') #plt.plot(x,[sqrt(n/2) for n in x],label='uniform') plt.xlabel('String Length') plt.ylabel('Compression Ratio') plt.legend(bbox_to_anchor=(0.3,0.9))