def scheme6(kmer): ''' 3 bit alphabet, MTF, shitty huffman encoding, 0 => 1, 1 => 01, 2 => 000, 3 => 0011, 4 => 0010 modified MTF to not push to front when base is N ''' def huff_size(c): return { 0: 1, 1: 2, 2: 3, 3: 4, 4: 4, }[c] trans_kmer = mtf.mtf_n(kmer) size = 0 i = 0 while i < len(trans_kmer): size += huff_size(trans_kmer[i]) i += 1 return size
def scheme5(kmer): ''' 3 bit alphabet, BWT, MTF, shitty huffman encoding, 0 => 1, 1 => 01, 2 => 000, 3 => 0011, 4 => 0010 Add 6 bits for eol position modified MTF to not push to front when base is N ''' def huff_size(c): return { 0: 1, 1: 2, 2: 3, 3: 4, 4: 4, }[c] trans_kmer, eol_idx = bwt.eol_format(bwt.bwt(kmer)) trans_kmer = mtf.mtf_n(trans_kmer) size = 0 size += 6 i = 0 while i < len(trans_kmer): size += huff_size(trans_kmer[i]) i += 1 return size