def common_sequence(vector_a, vector_b, index_a=None, index_b=None, T=None): sys.setrecursionlimit(10**6) if (index_a is None and index_b is None and T is None): return common_sequence(vector_a, vector_b, 1, 1, create_dict()) if (index_a > len(vector_a) or index_b > len(vector_b)): return Sequence() if ((index_a, index_b) in T): return T[(index_a, index_b)] else: T[(index_a, index_b)] = Sequence() if vector_a[index_a - 1] == vector_b[index_b - 1]: incremented_value = Sequence([ vector_a[index_a - 1] ]) + common_sequence(vector_a, vector_b, index_a + 1, index_b + 1, T) T[(index_a, index_b)] = max(T[(index_a, index_b)], incremented_value) else: T[(index_a, index_b)] = max( T[(index_a, index_b)], common_sequence(vector_a, vector_b, index_a + 1, index_b, T), common_sequence(vector_a, vector_b, index_a, index_b + 1, T)) return T[(index_a, index_b)]
def test_calculate_weight(): # Similar to the above but testing calculate_weight. sequence = Sequence("G") assert np.isclose(Sequence.WEIGHTS['G'], Sequence.calculate_weight(sequence), atol=0.01), \ "Weight returned was unexpected"
def __init__(self, sequences, plotter_args=None, drawer_args=None): self.sequences = [ Sequence.from_fasta_file(sequences.file1), Sequence.from_fasta_file(sequences.file2) ] self.plotter = Plotter(plotter_args) self.drawer = Drawer(drawer_args)
def read_seqs(self, path): results = [] with open(path, 'r') as infile: inlines = infile.readlines() header = '' seq_items = [] first = True for line in inlines: if line[0] == ';': continue # comment elif line[0] == '>': if not first: seq = "".join(seq_items) results.append(Sequence(header, seq)) seq_items = [] header = line[1:-1].strip( ) # eat '>' and '\n' ans extra whitespace first = False else: seq_items.append(line.strip().upper()) if len(seq_items) > 0: seq = "".join(seq_items) if not self.check_seen_seqname(results, header): results.append(Sequence(header, seq)) else: infile.close() raise DuplicateSeqNameException(header) infile.close() return results
def test_is_high_order_arithmetic(self): high_order_arithmetic_list = [ '1 2 4 7', '3 4 6 9', '1 3 5 7 9', '1 2 3 4 5', ] high_order_arithmetic_with_ratio_list = [ '4 14 34 74 154', '1 3 7 15', ] not_high_order_arithmetic_list = [ '1 4 10 15', '1 1 2 3 5 8' ] for this_str in high_order_arithmetic_list: this_seq = Sequence(raw_str=this_str) self.assertTrue(this_seq.is_high_order_arithmetic(with_ratio=False)) for this_str in high_order_arithmetic_with_ratio_list: this_seq = Sequence(raw_str=this_str) self.assertTrue(this_seq.is_high_order_arithmetic(with_ratio=True)) for this_str in not_high_order_arithmetic_list: this_seq = Sequence(raw_str=this_str) self.assertFalse(this_seq.is_high_order_arithmetic(with_ratio=False)) self.assertFalse(this_seq.is_high_order_arithmetic(with_ratio=True))
def read_fasta_file(infilename): infile = open(infilename, "r") seqlist = [] curseqname = "" curseq = "" first = True for i in infile: i = i.strip() if len(i) == 0: continue if i[0] == ">": if first == True: first = False else: # if not the first, store the last seq tseq = Sequence() tseq.name = curseqname tseq.seq = curseq seqlist.append(tseq) curseqname = i[1:] curseq = "" else: #seq can be on multiple lines curseq += i #need to get the last sequence stored tseq = Sequence() tseq.name = curseqname tseq.seq = curseq seqlist.append(tseq) infile.close() return seqlist
def test_ctor(): s = Sequence(range(3)) assert s.l == [0, 1, 2] s = Sequence([4, 1, 2]) assert s.l == [4, 1, 2] s = Sequence((1, 2, 3)) assert s.l == [1, 2, 3]
def test_forge_high_order_arithmetic(self): this_high_order_arithmetic_seq_not_with_datio = Sequence(raw_str='1 2 4 7') this_high_order_arithmetic_seq_with_ratio = Sequence(raw_str='4 14 34 74') forged_seq = '11 16 22 29 37 46 56 67 79 92' forged_seq_with_ratio = '154 314 634 1274 2554 5114 10234 20474 40954 81914' self.assertEqual(this_high_order_arithmetic_seq_not_with_datio.forge_high_order_arithmetic( with_ratio=False), forged_seq) self.assertEqual(this_high_order_arithmetic_seq_with_ratio.forge_high_order_arithmetic( with_ratio=True), forged_seq_with_ratio)
def test_max_element(self): self.assertEqual([0, 0, 0], Sequence().max_el(""), "Empty Sequence max Elements") self.assertEqual([1, 1, 1], Sequence().max_el("1"), "1 Sequence max Elements") self.assertEqual([2, 1, 2], Sequence().max_el("1,2"), "2 Sequence max Elements") self.assertEqual([5, 3, 8], Sequence().max_el("8,3,6,5,7"), "N number Sequence min Elements")
def test_isWinningPartialSequence(self): testee = Sequence([None, None, None, None]) self.assertFalse(testee.is_winning()) testee = Sequence([piece.PTTTT, None, None, None]) self.assertFalse(testee.is_winning()) testee = Sequence([None, None, None, piece.PTTTT]) self.assertFalse(testee.is_winning()) testee = Sequence([piece.PTTTF, None, None, piece.PTTTT]) self.assertFalse(testee.is_winning())
def test_average(self): self.assertEqual([0, 0, 0, 0], Sequence().average_el(""), "Empty Sequence average Elements") self.assertEqual([1, 1, 1, 1], Sequence().average_el("1"), "1 Sequence average Elements") self.assertEqual([2, 1, 8, 4.5], Sequence().average_el("1,8"), "2 Sequence average Elements") self.assertEqual([6, 1, 4, 2], Sequence().average_el("1,1,1,2,3,4"), "N Sequence average Elements")
def extend_seq(input_str): this_sequence = Sequence(raw_str=input_str) if this_sequence.is_empty_list(): return this_sequence.empty_seq() if this_sequence.is_only_one_item(): return this_sequence.only_one_item_seq() if this_sequence.is_arithmetic(): return this_sequence.forge_arithmetic() if this_sequence.is_geometric(): return this_sequence.forge_geometric() if this_sequence.is_fibonacci(): return this_sequence.forge_fibonacci() if this_sequence.is_square_seq(): return this_sequence.forg_square_seq() if this_sequence.is_high_order_arithmetic(): return this_sequence.forge_high_order_arithmetic() if this_sequence.is_high_order_arithmetic(with_ratio=True): return this_sequence.forge_high_order_arithmetic(with_ratio=True) return 'unknown sequence'
def main(args): seq = list(Sequence.fromFile(args.sequenceFilename)) score = Score.fromFile(args.substitutionMatrix) openingPenality = args.openingPenality extendingPenality = args.extendingPenality local = args.local if not args.nseq1: for i in range(len(seq)): seq1 = seq[i] for j in range(i + 1, len(seq)): seq2 = seq[j] if local: localAlignement(seq1, seq2, openingPenality, extendingPenality, score, args.substitutionMatrix) else: globalAlignement(seq1, seq2, openingPenality, extendingPenality, score, args.substitutionMatrix) print("\n" + "#"*80 + "\n") else: if not (1 <= args.nseq1 <= len(seq) and 1 <= args.nseq2 <= len(seq)): parser.error("-1 and -2 option must be in range of the number of sequences.") seq1 = seq[args.nseq1 - 1] seq2 = seq[args.nseq2 - 1] if local: localAlignement(seq1, seq2, openingPenality, extendingPenality, score, args.substitutionMatrix) else: globalAlignement(seq1, seq2, openingPenality, extendingPenality, score, args.substitutionMatrix)
def get_sequence(self, goal, env): goal_indices = get_goal_indices(goal) symbols = [None] * len(goal_indices) positions = [] for index in goal_indices: pos_found = False search_index = 0 column = self.language_matrix[:, index, :] sorted_column = column.flatten().argsort()[::-1] while not pos_found: sym_index, pos = unravel_index(sorted_column[search_index], column.shape) if pos not in positions: positions.append(pos) position = pos if pos < len(goal_indices) else 0 symbols[position] = sym_index pos_found = True else: contained_sym = symbols[pos] contained_value = column[contained_sym, pos] new_value = column[sym_index, pos] if new_value > contained_value: symbols[pos] = sym_index search_index += 1 symbs = [env.symbols[symbol] for symbol in symbols] return Sequence(symbs)
def test_grid_sequence2(self): initial_state = [ [piece.PFTTT, None , piece.PTFFT, piece.PTTFT], [None , None , None , None ], [None , piece.PTTTF, None , None ], [piece.PTTTT, None , None , None ] ] initial_grid = Grid(initial_state) new_grid = initial_grid.place(piece.PFFTF, 11) # print(new_grid) expected_sequence = { 0: [piece.PFTTT, None, piece.PTTTT], 1: [piece.PTTTF], 2: [piece.PTFFT], 3: [piece.PTTFT, piece.PFFTF], 4: [piece.PFTTT, piece.PTFFT, piece.PTTFT], 5: [None, None, None, None], 6: [piece.PTTTF, piece.PFFTF], 7: [piece.PTTTT], 8: [piece.PFTTT], 9: [piece.PTTTT, piece.PTTTF, piece.PTTFT], } for i in range(10): self.assertEqual(Sequence(expected_sequence[i]), new_grid.getSeq(i), "Unexpected Sequence " + str(i))
def do_POST(self): try: if not self.check_auth(True): return self.send_response(200) self.send_header('Content-type', "text/html") self.end_headers() self.wfile.write(b"") content_length = int(self.headers['Content-Length']) post_data = self.rfile.read(content_length) post_string = post_data.decode("utf-8") if self.path == "/set/sequence/": seq = Sequence.parsestring(post_string) PLAYER.runsequence(seq) elif self.path == "/set/playlist/": playlist = SequencePlaylist.parsestring(post_string) PLAYER.runplaylist(playlist) except Exception as ex: print("ERROR: {0}".format(ex)) self.send_response(500) self.end_headers() if cfg.VERBOSE_LOGGING: raise ex
def main(): print(Sequence(fibonacci_generator()) .where(lambda x: x % 3 == 0) .select(lambda x: x if x % 2 else x ** 2) .take(5) .to_list()) with open('text.txt', 'r') as text: print(Sequence(["a b d b b bs b sb sb sa bsa "]) .select(lambda line: line.split()) .flatten() .group_by(lambda x: x) .select(lambda x: (x[0], len(x[1]))) .order_by(lambda x: -x[1]) .take(50) .to_list())
def test_equality1(self): s1 = Sequence([piece.PTTTF, None, None, piece.PTTTT]) s2 = Sequence([piece.PTTTF, None, None, piece.PTTTT]) self.assertEqual(s1, s2, "Equality expected 1.") self.assertTrue(s1 == s2, "Equality expected 2.") self.assertFalse(s1 != s2, "Equality expected 3.") s3 = Sequence([piece.PTTTF, piece.PTTTT]) self.assertEqual(s1, s3, "Equality expected 4.") self.assertEqual(s2, s3, "Equality expected 5.") # sa has a different order of the same two piece, so its different s4 = Sequence([piece.PTTTT, piece.PTTTF]) self.assertNotEqual(s1, s4, "Inequality expected 1.") self.assertTrue(s1 != s4, "Inequality expected 2.") self.assertFalse(s1 == s4, "Inequality expected 3.")
def admin_notice(cls, sid, start, end, content): """登录面板公告 """ id = Sequence.generate_notice_id(sid) notice = {"notice_type": NOTICE_TYPE_1, "start": start, "end": end, "content": content, "id": id} cls.save(sid, id, NOTICE_TYPE_1, notice)
def get_sequence(self, goal, env): goal_index = env.goals.index(goal) column = self.language_matrix[:, goal_index] max_set = argwhere(column == column.max()) symbol_index = choice( squeeze(max_set)) if len(max_set) > 1 else argmax(column) return Sequence([env.symbols[symbol_index]])
def freq_pr_corr(self): scorr = {} s = Sequence() kgram_list, kgram_count = s.create_k_grams(3) rank = s.get_weekly_ranks()[self.log.end_date] print rank for kgram in kgram_list: r = [] f = [] for i, user in enumerate(rank): f.append(kgram_count[user][kgram]) r.append(i + 1) scorr[kgram] = stats.pearsonr(r, f) x = [] y = [] for i, kgram in enumerate(kgram_list): print kgram, scorr[kgram] if scorr[kgram][0] < -0.5 and scorr[kgram][1] < 0.001: x.append(i) y.append(scorr[kgram][0]) l = [] for i in x: l.append(''.join(list(kgram_list[i]))) plt.xticks(x, l) plt.plot(x, y) plt.show()
def test_is_fibonacci(self): fibonacci_list = [ '1 1 2 3 5 8', ] not_fibonacci_list = [ '2 4 6 8 10', '1 3 5 7 9', '1 4 10 15', ] for this_str in fibonacci_list: this_seq = Sequence(raw_str=this_str) self.assertTrue(this_seq.is_fibonacci()) for this_str in not_fibonacci_list: this_seq = Sequence(raw_str=this_str) self.assertFalse(this_seq.is_fibonacci())
def getScoreDictionary(self, seq): seqObject = Sequence(seq) scoreDict = {} for i in range(0, seqObject.getLength() - self.length): subseq = seqObject.subseq(i, i + self.length) scoreDict[i] = self.getSequenceScore(subseq.getSequence()) return scoreDict
def main(): """The whole enchilada.""" years = 70 # the Biblical "three score and ten" fwd = 0 bkwd = 0 # in pessimal forward lives days = 365 * years trials = 100 for _ in range(trials): # multiple trials seq = Sequence(days) (forward_trends, backward_trends) = buck_the_trend(seq) fwd += forward_trends bkwd += backward_trends mean_forward_trends = fwd / trials mean_backward_trends = bkwd / trials print("Average number of upward trends in a random life: ", mean_forward_trends) print("Average number of upward trends in a pessimal life: ", mean_backward_trends) print("Average upward trend length in a random life: {} years".format( years / mean_forward_trends)) print("Average upward trend length in a pessimal life: {} years".format( years / mean_backward_trends))
def test_is_square_seq(self): square_list = [ '1 4 9 16', '1 9 25' ] not_square_list = [ '2 4 6 8 10', '1 3 5 7 9', '1 4 10 15', ] for this_str in square_list: this_seq = Sequence(raw_str=this_str) self.assertTrue(this_seq.is_square_seq()) for this_str in not_square_list: this_seq = Sequence(raw_str=this_str) self.assertFalse(this_seq.is_square_seq())
def admin_broadcast(cls, sid, start, end, interval, msg): """系统跑马灯广播 """ now = int(time.time()) id = Sequence.generate_notice_id(sid) notice = {"notice_type": NOTICE_TYPE_2, "start": start, "end": end, "interval": interval, "content": msg, "id": id} cls.save(sid, id, NOTICE_TYPE_2, notice)
def init_child(parent, child): from sequence import Sequence if child is None: child = Sequence([]) elif isinstance(child, BaseStep): pass else: try: child = Sequence(child) except TypeError: raise Error( "Argument must be an instance of Step or a list of Steps") child.on("all", parent._bubbleEvent) return child
def test_get_nucleotides(self): sequence_str = "GATTACCA" sequence = Sequence(sequence_str) # 创建了一个sequence对象; # 当字符串被填入对象后,会自动运行__init__方法初始化 self.assertEqual(sequence_str, sequence.nucleotides, msg="Nucleotides returned were not those given")
def test_is_geometric(self): geometric_list = [ '2 4 8 16', '4 12 36', ] not_geometric_list = [ '2 4 6 8 10', '1 3 5 7 9', '1 1 2 3 5 8', ] for this_str in geometric_list: this_seq = Sequence(raw_str=this_str) self.assertTrue(this_seq.is_geometric()) for this_str in not_geometric_list: this_seq = Sequence(raw_str=this_str) self.assertFalse(this_seq.is_geometric())
def test_is_arithmetic(self): arithmetic_list = [ '2 4 6 8 10', '1 3 5 7 9', '5 10', ] not_arithmetic_list = [ '2 4 8 16', '1 1 2 3 5 8', ] for this_str in arithmetic_list: this_seq = Sequence(raw_str=this_str) self.assertTrue(this_seq.is_arithmetic()) for this_str in not_arithmetic_list: this_seq = Sequence(raw_str=this_str) self.assertFalse(this_seq.is_arithmetic())
def send(cls, sid, ctype, msg, sender, group_id=None, to_who=None): """发言 """ send_time = int(time.time()) # 处理敏感字符 msg = gfw.replace(msg) chat_msg = {} chat_msg["send_time"] = send_time chat_msg["msg"] = msg if ctype == CHAT_CHANNEL_NOTICE: redis_client.lpush(rediskey_config.CHAT_NOTICE_BOX_KEY % sid, pickle.dumps(chat_msg)) if redis_client.llen(rediskey_config.CHAT_NOTICE_BOX_KEY % sid) > CHAT_NOTICE_MAX_NUM: redis_client.ltrim(rediskey_config.CHAT_NOTICE_BOX_KEY % sid, 0, CHAT_NOTICE_MAX_NUM - 1) else: group_name = GroupService.get_name_by_id(sid, group_id) sender["group_name"] = group_name chat_msg["sender"] = sender if ctype == CHAT_CHANNEL_WORLD: redis_client.lpush(rediskey_config.CHAT_WORLD_BOX_KEY % sid, pickle.dumps(chat_msg)) if redis_client.llen(rediskey_config.CHAT_WORLD_BOX_KEY % sid) > CHAT_WORLD_MAX_NUM: redis_client.ltrim(rediskey_config.CHAT_WORLD_BOX_KEY % sid, 0, CHAT_WORLD_MAX_NUM - 1) elif ctype == CHAT_CHANNEL_GROUP: group_chat_box = redis_client.hget(rediskey_config.CHAT_GROUP_BOX_KEY % sid, group_id) if group_chat_box is None: redis_client.hset(rediskey_config.CHAT_GROUP_BOX_KEY % sid, group_id, pickle.dumps([chat_msg])) else: box = pickle.loads(group_chat_box) box.append(chat_msg) if len(box) > CHAT_GROUP_MAX_NUM: final_box = box[-CHAT_GROUP_MAX_NUM:] redis_client.hset(rediskey_config.CHAT_GROUP_BOX_KEY % sid, group_id, pickle.dumps(final_box)) else: redis_client.hset(rediskey_config.CHAT_GROUP_BOX_KEY % sid, group_id, pickle.dumps(box)) elif ctype == CHAT_CHANNEL_PRIVATE: user_info = user_logic.fetch_user_info(to_who) chat_msg["receiver"] = user_info["name"] if user_info else "" msg_id = Sequence.generate_chat_private_id(sid) redis_client.hset(rediskey_config.CHAT_PRIVATE_POOL_KEY % sid, msg_id, pickle.dumps(chat_msg)) # 把私聊信息扔到一个大池子里,留一个编号让发信息和接受信息的人都记住,凭着号去取聊天信息 for uid in [sender["uid"], to_who]: private_box = redis_client.hget(rediskey_config.CHAT_PRIVATE_BOX_KEY % sid, uid) if private_box is None: redis_client.hset(rediskey_config.CHAT_PRIVATE_BOX_KEY % sid, uid, pickle.dumps([msg_id])) else: box = pickle.loads(private_box) box.append(msg_id) redis_client.hset(rediskey_config.CHAT_PRIVATE_BOX_KEY % sid, uid, pickle.dumps(box)) else: pass
def create_cache(self, state_count): """ Create a new zipped sequence optimized for the specified number of states in the hidden Markov model and cache the new sequence in the directory managed by this instance. state_count -- The optimal number of states in the hidden Markov model. """ assert state_count > 0 # # Create the directory of zipped sequences if it does not yet exist. # if not os.path.isdir(self.__state_count_to_sequence_path): os.mkdir(self.__state_count_to_sequence_path) # # Reread the original sequence, and zip it, optimized for the specified # state count. # seq = Sequence.from_file( self.__original_sequence_path, self.__unzipped_alphabet_size) x_seq = ZipSequence.from_sequence(seq, state_count) # # Loop over symbols in the zipped sequence; if the symbol is new, then # ensure it matches an existing symbol in the substitution table--or # that it does not exist in the substitution table; put new symbols # into the substitution table of this instance. # for symbol in xrange(x_seq.alphabet_size): if symbol < self.__unzipped_alphabet_size: continue new_a, new_b = x_seq.get_substitute(symbol) if symbol in self.__substitutions: old_a, old_b = self.__substitutions[symbol] assert old_a == new_a assert old_b == new_b continue self.__substitutions[symbol] = (new_a, new_b) # # Record the number of symbols used for this state count, save the new # data structure, save the zipped sequence, and return the sequence. # self.__zipped_alphabet_sizes[state_count] = x_seq.alphabet_size self.__save_data_structure() x_seq_path = self.__resolve_zipped_sequence_path(state_count) x_seq.save(x_seq_path) return x_seq
def __init__(self, filename): # FastA-Datei oeffnen try: FASTAFILE = open(filename, "r") except: print("cannot open file: '" + filename + "': " + str(sys.exc_info()[1])) sys.exit(1) # FastA-Datei lesen seqN = -1 ids = [] seq = [] patt = re.compile(r"\s+") for line in FASTAFILE: if (seqN == -1) and not (line.startswith(">")): print("input file is not in FastA format") sys.exit(1) line = line.rstrip() if (line.startswith(">")): # Anzahl der Sequenzen seqN += 1 # Sequenz id: alles bis zum ersten Leerzeichen id = line.lstrip(">") id = id.split(None, 1) ids.append(str(id[0])) seq.append("") elif (line.startswith(";")): # ignoriere Kommentare continue else: # Sequenz (von Leerzeichen befreit) anfuegen seq[seqN] += str(re.sub(patt, "", line)) FASTAFILE.close() # object konstruieren Sequence.__init__(self, ids, seq) self.__filename = filename
def send(uid, mail): """发送邮件 """ # 根据玩家ID特殊服务器标示位确定服务器ID new_mail_id = Sequence.generate_mail_id(uid[:-8]) mail.update({"mail_id": new_mail_id, "create": time.time(), "read": 0}) new_mail_box_key = rediskey_config.MAIL_REPERTORY_NEW_PREFIX % uid try: redis_client.hset(new_mail_box_key, new_mail_id, pickle.dumps(mail)) new_mails = redis_client.hkeys(new_mail_box_key) # 新邮件保留五十封,过期的自动删除 new_mails.sort() for expired_mail_id in new_mails[:-50]: redis_client.hdel(new_mail_box_key, expired_mail_id) except Exception,e: raise e
def __init__(self, directory): """ Initialize a new instance of the ZipDirectory class based on the specified directory. Require that the directory contains at least the text file, 'original_sequence', which contains the symbols of the original sequence in order and separated by whitespace. directory -- The directory to manage. """ assert os.path.isdir(directory) # # All ZipHMM directories must contain the 'original_sequence' file. # self.__path = directory assert os.path.isfile(self.__original_sequence_path) # # If the 'data_structure' file does not exist, this is an uninitialized # directory; read the original sequence and initially store the data # structure. # if not os.path.isfile(self.__data_structure_path): seq = Sequence.from_file(self.__original_sequence_path) self.__unzipped_alphabet_size = seq.alphabet_size self.__unzipped_sequence_length = len(seq) self.__zipped_alphabet_sizes = dict() self.__substitutions = dict() self.__save_data_structure() return # # Read the original alphabet size; only 16-bit values are allowed, and # there must be at least one symbol in the alphabet. # scanner = TokenScanner(self.__data_structure_path) scanner.require('orig_alphabet_size') self.__unzipped_alphabet_size = scanner.read_int() assert 1 <= self.__unzipped_alphabet_size < 65536 # # Read the original sequence length and assume this still matches the # data in the original sequence itself. # scanner.require('orig_seq_length') self.__unzipped_sequence_length = scanner.read_int() assert 1 <= self.__unzipped_sequence_length # # Read the number of symbols compressed for various state counts; this # will also define the maximum symbol value. # scanner.require('nStates2alphabet_size') self.__zipped_alphabet_sizes = dict() max_alphabet_size = 0 while True: token = scanner.peek() if token is None or token == 'symbol2pair': break # # Read the state count; duplicate entries are not allowed. # state_count = scanner.read_int() assert state_count > 0 assert state_count not in self.__zipped_alphabet_sizes # # Read the alphabet size. # alphabet_size = scanner.read_int() assert alphabet_size > 0 max_alphabet_size = max(alphabet_size, max_alphabet_size) # # Store the state count and alphabet size into the table used to # decompressed zipped sequences. # self.__zipped_alphabet_sizes[state_count] = alphabet_size # # Read the substitution table. # self.__substitutions = dict() scanner.require('symbol2pair') while scanner.peek() is not None: # # Read the new symbol; disallow duplicate entries for new symbols, # ensure the new symbol is not in the original alphabet, and ensure # the new symbol is not larger than the largest recorded alphabet # size from the previous section. # new_symbol = scanner.read_int() assert new_symbol not in self.__substitutions assert new_symbol >= self.__unzipped_alphabet_size assert new_symbol < max_alphabet_size # # Read the two symbols; neither may be larger than the largest # recorded alphabet size. # a = scanner.read_int() b = scanner.read_int() assert a < max_alphabet_size assert b < max_alphabet_size # # Record the entry in the substitution table, new_symbol => (a, b). # self.__substitutions[new_symbol] = (a, b) # # All (a, b) values symbols in the substitution table must either be # from the original alphabet or be present as a new symbol in the # substitution table. # subst # assert all(map(lambda (key, (a_, b_)): (a_ < self.__unzipped_alphabet_size or a_ in self.__substitutions) and (b_ < self.__unzipped_alphabet_size or b_ in self.__substitutions), self.__substitutions.iteritems()))