def connect_all(matrix, need_log=False): """ introduction: Integrate index and data from the two-dimensional matrix. :param matrix: Data from input. Type: Two-dimensional list(int). :param need_log: :return new_matrix: Data for output. Type: Two-dimensional list(int). """ m = monitor.Monitor() index_binary_length = int(len(str(bin(len(matrix)))) - 2) if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Add index in the binary matrix.") new_matrix = [] for row in range(len(matrix)): if need_log: m.output(row, len(matrix)) new_matrix.append(connect(row, matrix[row], index_binary_length)) m.restore() del matrix, m return new_matrix
def divide_all(matrix, need_log=False): """ introduction: Separate data from indexes in binary strings. :param matrix: The DNA sequence of len(matrix) rows. Type: Two-dimensional list(int). :param need_log: need output log. :returns index, datas: Obtained data sets and index sets in corresponding locations. Type: One-dimensional list(int), Two-dimensional list(int). """ m = monitor.Monitor() index_binary_length = int(len(str(bin(len(matrix)))) - 2) if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Divide index and data from binary matrix.") indexs = [] datas = [] for row in range(len(matrix)): if need_log: m.output(row, len(matrix)) index, data = divide(matrix[row], index_binary_length) indexs.append(index) datas.append(data) m.restore() del matrix, m return indexs, datas
def write_dna_file(path, dna_sequences, need_log=False): """ introduction: Writing DNA sequence set to documents. :param path: File path. Type: string :param dna_sequences: Generated DNA sequences. Type: one-dimensional list(string) :param need_log: choose to output log file or not. """ m = monitor.Monitor() try: with open(path, "w") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Write DNA sequences to file: " + path) for row in range(len(dna_sequences)): if need_log: m.output(row, len(dna_sequences)) file.write("".join(dna_sequences[row]) + "\n") return dna_sequences except IOError: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!")
def sort_order(indexes, data_set, need_log=False): """ introduction: Restore data in order of index. :param indexes: The indexes of data set. :param data_set: The disordered data set, the locations of this are corresponding to parameter "index". :param need_log: need output log. :returns matrix: Binary list in correct order. Type: Two-dimensional list(int). """ m = monitor.Monitor() if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Restore data order according to index.") # noinspection PyUnusedLocal matrix = [[0 for col in range(len(data_set[0]))] for row in range(len(indexes))] for row in range(len(indexes)): if need_log: m.output(row, len(indexes)) if 0 <= row < len(matrix): matrix[indexes[row]] = data_set[row] m.restore() del indexes, data_set, m return matrix
def get_yyc_rule_by_index(index, need_logs=False): rules = [] temp_rule1 = ["".join(x) for x in itertools.product("01", repeat=4)] temp_rule2 = ["".join(x) for x in itertools.product("01", repeat=16)] m = monitor.Monitor() if need_logs: print("Find all the available Yin-Yang rules.") count = 0 step = 0 for base in ["A", "T", "C", "G"]: for rule1index in range(len(temp_rule1)): for rule2index in range(len(temp_rule2)): rule1 = list(map(int, list(temp_rule1[rule1index]))) rule2 = numpy.array(list(map(int, list(temp_rule2[rule2index])))).reshape(4, 4).tolist() if _check(rule1, rule2): rules.append(YYCRule(rule1, rule2, base, count)) count += 1 step += 1 if need_logs: m.output(step, len(temp_rule1) * len(temp_rule2) * 4) if index < 0 or index >= len(rules): raise ValueError("We have " + str(len(rules)) + " rules, index " + str(index) + " is wrong!") if need_logs: print("Current Rule is " + str(rules[index].get_info()) + ".") return rules[index].get_info()
def read_dna_file(path, need_log=False): """ introduction: Reading DNA sequence set from documents. :param path: File path. Type: string :return dna_sequences: A corresponding DNA sequence string in which each row acts as a sequence. Type: one-dimensional list(string) :param need_log: need output log. """ m = monitor.Monitor() dna_sequences = [] try: with open(path, "r") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Read DNA sequences from file: " + path) # Read current file by line lines = file.readlines() for index in range(len(lines)): if need_log: m.output(index, len(lines)) line = lines[index] dna_sequences.append([line[col] for col in range(len(line) - 1)]) return dna_sequences except IOError: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!")
def __init__(self, homopolymer=4, gc_content=0.2, redundancy=0.5, c_dist=0.1, delta=0.5, header_size=4, recursion_depth=10000000, decode_packets=None): """ introduction: The initialization method of FC. :param homopolymer: maximum length of homopolymer, type = int. :param gc_content: the fraction of gc content above/below 0.5 (0.1 means 0.4-0.6). :param redundancy: artificial redundancy for decode successfully (0.5 generate 50% more fragments),type = float. :param c_dist: Degree distribution tuning parameter,type = float. :param delta: Degree distribution tuning parameter,type = float. :param header_size: number of bytes for the header, type = int. :param decode_packets: bit segments in the encoding process, type = int. """ self.homopolymer = homopolymer self.gc_content = gc_content self.redundancy = redundancy self.header_size = header_size self.recursion_depth = recursion_depth self.c_dist = c_dist self.delta = delta self.decode_packets = decode_packets self._init_check() self.prng = None self.file_size = 0 self.monitor = monitor.Monitor()
def read_binary_from_all(path, segment_length=120, need_log=False): """ introduction: Reading binary matrix from document. :param path: File path. Type: string :param segment_length: The binary segment length used for DNA sequence generation. Considering current DNA synthesis technique limitation, we usually set 120 as default segment length. :param need_log: choose to output log file or not. :return matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation. Type: two-dimensional list(int) """ m = monitor.Monitor() try: # Open selected file with open(path, mode="rb") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Read binary matrix from file: " + path) size = os.path.getsize(path) # Set init storage matrix matrix = [[0 for _ in range(segment_length)] for _ in range(math.ceil(size * 8 / segment_length))] row = 0 col = 0 for byte_index in range(size): if need_log: m.output(byte_index, size) # Read a file as bytes one_byte = file.read(1) element = list(map(int, list(str(bin(struct.unpack("B", one_byte)[0]))[2:].zfill(8)))) for bit_index in range(8): matrix[row][col] = element[bit_index] col += 1 if col == segment_length: col = 0 row += 1 if int(len(str(bin(len(matrix)))) - 2) * 7 > segment_length: if need_log: log.output(log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "The proportion of index in whole sequence may be high. \n" "It is recommended to increase the length of output DNA sequences " "or to divide the file into more segment pools") return matrix, size except IOError: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!")
def __init__(self, fixed_huffman=True): """ introduction: The initialization method of Huffman Codec. :param fixed_huffman: Declare whether to use the Huffman dictionary in Goldman's paper. In order to reduce the possible loss of function storage, we recommend using this dictionary. """ self.huffman_tree = None self.segment_length = 0 self.fixed_huffman = fixed_huffman self.file_size = 0 self.m = monitor.Monitor()
def __init__(self, mapping_rule=None): """ introduction: The initialization method of Simple Codec. :param mapping_rule: Mapping between bases and numbers. There can be two settings: (1) Two bases correspond to a number (0 or 1): i.e. AT-0, CG-1. (2) Each base corresponds to a number: i.e. A-00, T-01, C-10, G-11. """ if not mapping_rule: mapping_rule = [0, 1, 1, 0] self.mapping_rule = mapping_rule self._init_check() self.file_size = 0 self.m = monitor.Monitor()
def write_all_from_binary(path, matrix, size, need_log=False): """ introduction: Writing binary matrix to document. :param path: File path. Type: string :param matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation. Type: two-dimensional list(int) :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int :param need_log: choose to output log file or not. """ m = monitor.Monitor() try: with open(path, "wb+") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Write file from binary matrix: " + path) # Change bit to byte (8 -> 1), and write a file as bytes bit_index = 0 temp_byte = 0 for row in range(len(matrix)): if need_log: m.output(row, len(matrix)) for col in range(len(matrix[0])): bit_index += 1 temp_byte *= 2 temp_byte += matrix[row][col] if bit_index == 8: if size >= 0: file.write(struct.pack("B", int(temp_byte))) bit_index = 0 temp_byte = 0 size -= 1 except IOError: log.output(log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!")
def __init__(self, base_values=None, need_log=False): """ introduction: The initialization method of Grass Codec. :param base_values: Assignment of 48 base triplets (0-47). Other values and their corresponding triplets will be discarded. One-dimensional list containing all values of 0-46, 47 will be discarded. """ if base_values is None: base_values = [index for index in range(48)] temp_keys = [] temp_values = [] for index in range(len(base_values)): if 0 <= base_values[index] < 47: temp_keys.append(inherent.gc_codes[index]) temp_values.append(base_values[index]) self.mapping_rule = [temp_keys, temp_values] self.segment_length = 0 self.file_size = 0 self.m = monitor.Monitor()