def huffman_coding(omega): n = len(omega) q = ds.Heap() alphabet = util.word_alphabet(omega) dict = util.dictionary(alphabet) bits = [""] * len(alphabet) word_count = util.letter_count(omega, dict, alphabet) word_prob = [count / n for count in word_count] for i in range(len(word_prob)): q.insert([word_prob[i], i]) #Encode least probable characters with one further bit while (q.size > 1): min1 = q.getMin() q.extractMin() min2 = q.getMin() q.extractMin() min2[0] += min1[0] #Add 0 to characters in set 2 for i in range(1, len(min2)): bits[min2[i]] += "0" #ADD 1 to characters in set 1 for i in range(1, len(min1)): min2.append(min1[i]) bits[min1[i]] += "1" q.insert(min2) #Create code book based on encoding code_words = ds.CodeBook() for i in range(len(bits)): code = bits[i][::-1] code_words.insert(i, code) return (code_words)
def real_arithmetic_coding(omega, word): n = len(omega) alphabet = util.word_alphabet(omega) m = len(alphabet) dict = util.dictionary(alphabet) word_count = util.letter_count(omega, dict, alphabet) word_prob = [count / n for count in word_count] prob_ranges = [] prob_sum = 0 for i in range(m): start = prob_sum prob_sum += word_prob[i] prob_ranges.append([start, prob_sum]) for j in range(len(word)-1): index = dict[word[j]] low = prob_ranges[index][0] high = prob_ranges[index][1] prob = high - low prob_sum = low prob_ranges = [] for i in range(m): start = prob_sum prob_sum += word_prob[i] * prob prob_ranges.append([start, prob_sum]) last_letter = word[len(word)-1] last_letter_index = dict[last_letter] final_range = prob_ranges[last_letter_index] bits_low = util.decimal2binary(final_range[0]) bits_high = util.decimal2binary(final_range[1]) return bits_low, bits_high
def MTF_encode(omega, Lambda=None): n = len(omega) eta = [] #If there exists no alphabet for the word compute it on the fly if (Lambda == None): Lambda = util.word_alphabet(omega) Lambda = sorted(Lambda) alphabet = copy.deepcopy(Lambda) #encode next character based on alphabet for w in omega: i = alphabet.index(w) eta.append(i) k = w #update alphabet in moving last character to the front of the alphabet for j in range(i + 1): l = alphabet[j] alphabet[j] = k k = l return eta, Lambda
def Permutation(eta): n = len(eta) pi = [0] * n alphabet = util.word_alphabet(eta) Lambda = [0] * len(alphabet) alphabet = sorted(alphabet) sigma = util.dictionary(alphabet) Lambda = util.letter_count(eta, sigma, alphabet) theta = [0] * len(Lambda) i = 1 #Compute how many letters occure before in the sorted list while (i < len(theta)): theta[i] = theta[i - 1] + Lambda[i - 1] i += 1 #Compute the final Permutation for i in range(n): letter = sigma[eta[i]] pi[i] = theta[letter] theta[letter] += 1 return (pi)
def shannon_fano_elias_coding(omega): n = len(omega) alphabet = util.word_alphabet(omega) m = len(alphabet) dict = util.dictionary(alphabet) bits = "" word_count = util.letter_count(omega, dict, alphabet) word_prob = [count / n for count in word_count] sum = 0 notebook = ds.CodeBook() for i in range(m): prob = word_prob[i] sum += prob Fx = sum - 0.5*prob bits = util.decimal2binary(Fx) bits = bits[2:len(bits)] encoding_length = math.ceil(math.log(1/prob, 2)) + 1 while(len(bits) < encoding_length): bits += '0' if(len(bits) > encoding_length): bits = bits[0:encoding_length] notebook.insert(alphabet[i], bits) return notebook