def write_dna_file(path, dna_sequences, need_log=False): """ introduction: Writing DNA sequence set to documents. :param path: File path. Type: string :param dna_sequences: Generated DNA sequences. Type: one-dimensional list(string) :param need_log: choose to output log file or not. """ m = monitor.Monitor() try: with open(path, "w") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Write DNA sequences to file: " + path) for row in range(len(dna_sequences)): if need_log: m.output(row + 1, len(dna_sequences)) file.write("".join(dna_sequences[row]) + "\n") return dna_sequences except IOError: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!" )
def get_yyc_rules(need_log=False): rules = [] temp_rule1 = ["".join(x) for x in itertools.product("01", repeat=4)] temp_rule2 = ["".join(x) for x in itertools.product("01", repeat=16)] m = monitor.Monitor() if need_log: # noinspection PyProtectedMember log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Find all the available Yin-Yang rules.") count, step = 0, 0 for base in ["A", "T", "C", "G"]: for rule1index in range(len(temp_rule1)): for rule2index in range(len(temp_rule2)): rule1 = list(map(int, list(temp_rule1[rule1index]))) rule2 = numpy.array( list(map(int, list(temp_rule2[rule2index])))).reshape( 4, 4).tolist() if _check(rule1, rule2): rules.append(YYCRule(rule1, rule2, base, count)) count += 1 step += 1 if need_log: m.output(step, len(temp_rule1) * len(temp_rule2) * 4) return rules
def divide_all(matrix, need_log=False): """ introduction: Separate data from indexes in binary strings. :param matrix: The DNA sequence of len(matrix) rows. Type: Two-dimensional list(int). :param need_log: need output log. :returns index, datas: Obtained data sets and index sets in corresponding locations. Type: One-dimensional list(int), Two-dimensional list(int). """ m = monitor.Monitor() index_binary_length = int(len(str(bin(len(matrix)))) - 2) if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Divide index and data from binary matrix.") indexs = [] datas = [] for row in range(len(matrix)): if need_log: m.output(row + 1, len(matrix)) index, data = divide(matrix[row], index_binary_length) indexs.append(index) datas.append(data) m.restore() del matrix, m return indexs, datas
def connect_all(matrix, need_log=False): """ introduction: Integrate index and data from the two-dimensional matrix. :param matrix: Data from input. Type: Two-dimensional list(int). :param need_log: :return new_matrix: Data for output. Type: Two-dimensional list(int). """ m = monitor.Monitor() index_binary_length = int(len(str(bin(len(matrix)))) - 2) if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Add index in the binary matrix.") new_matrix = [] for row in range(len(matrix)): if need_log: m.output(row + 1, len(matrix)) new_matrix.append(connect(row, matrix[row], index_binary_length)) m.restore() del matrix, m return new_matrix
def _sample_init(self, graph): ''' we prepare the sampling process - first we expand its edges to nodes, so eden will be able wo work its magic on it - then we calculate a score for the graph, to see how much we like it - we setup the similarity checker stop condition - possibly we are in a multiprocessing process, and this class instance hasnt been used before, in this case we need to rebuild the postprocessing function . ''' self.monitorobject=monitor.Monitor(self.monitor) self.backtrack=self.maxbacktrack self.last_graphman = None graphman=self.preprocessor.transform([graph])[0] graph = graphman.base_graph() if self.max_core_size_diff > -1: self.seed_size = len(graph) self._score(graphman) self._sample_notes = '' self._sample_path_score_set = set() if self.include_seed==False: # make sure that seed never appears,, may happen if there is nothing happening self._sample_path_score_set.add(graphman._score) #print 'sample init:',graphman #draw.graphlearn_draw(graphman.graph()) return graphman
def sort_order(indexes, data_set, need_log=False): """ introduction: Restore data in order of index. :param indexes: The indexes of data set. :param data_set: The disordered data set, the locations of this are corresponding to parameter "index". :param need_log: need output log. :returns matrix: Binary list in correct order. Type: Two-dimensional list(int). """ m = monitor.Monitor() if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Restore data order according to index.") # additional information checker flag_index = 0 if max(indexes) > len(indexes): while True: if flag_index + 1 not in indexes: # index to length flag_index += 1 break flag_index += 1 if need_log and flag_index > 0: log.output( log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "There are " + str(flag_index) + " required bit segments and " + str(len(indexes) - flag_index) + " additional bit segments") # noinspection PyUnusedLocal if flag_index > 0: matrix = [[0 for _ in range(len(data_set[0]))] for _ in range(flag_index)] else: matrix = [[0 for _ in range(len(data_set[0]))] for _ in range(len(indexes))] for index in range(len(matrix)): matrix[index] = data_set[indexes.index(index)] if need_log: m.output(index + 1, len(matrix)) m.restore() del indexes, data_set, m return matrix
def write_all_from_binary(path, matrix, size, need_log=False): """ introduction: Writing binary matrix to document. :param path: File path. Type: string :param matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation. Type: two-dimensional list(int) :param size: This refers to file size, to reduce redundant bits when transferring DNA to binary files. Type: int :param need_log: choose to output log file or not. """ m = monitor.Monitor() try: with open(path, "wb+") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Write file from binary matrix: " + path) # Change bit to byte (8 -> 1), and write a file as bytes bit_index = 0 temp_byte = 0 for row in range(len(matrix)): if need_log: m.output(row + 1, len(matrix)) for col in range(len(matrix[0])): bit_index += 1 temp_byte *= 2 temp_byte += matrix[row][col] if bit_index == 8: if size >= 0: file.write(struct.pack("B", int(temp_byte))) bit_index = 0 temp_byte = 0 size -= 1 except IOError: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!" )
def read_dna_file(path, need_log=False): """ introduction: Reading DNA sequence set from documents. :param path: File path. Type: string :return dna_sequences: A corresponding DNA sequence string in which each row acts as a sequence. Type: one-dimensional list(string) :param need_log: need output log. """ m = monitor.Monitor() dna_sequences = [] try: with open(path, "r") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Read DNA sequences from file: " + path) # Read current file by line lines = file.readlines() for index in range(len(lines)): if need_log: m.output(index + 1, len(lines)) line = lines[index] dna_sequences.append( [line[col] for col in range(len(line) - 1)]) return dna_sequences except IOError: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!" )
def read_binary_from_all(path, segment_length=120, need_log=False): """ introduction: Reading binary matrix from document. :param path: File path. Type: string :param segment_length: The binary segment length used for DNA sequence generation. Considering current DNA synthesis technique limitation, we usually set 120 as default segment length. :param need_log: choose to output log file or not. :return matrix: A matrix in which each row represents a binary segment that will be used for DNA sequence generation. Type: two-dimensional list(int) """ m = monitor.Monitor() try: # Open selected file with open(path, mode="rb") as file: if need_log: log.output(log.NORMAL, str(__name__), str(sys._getframe().f_code.co_name), "Read binary matrix from file: " + path) size = os.path.getsize(path) # Set init storage matrix matrix = [[0 for _ in range(segment_length)] for _ in range(math.ceil(size * 8 / segment_length))] row = 0 col = 0 for byte_index in range(size): if need_log: m.output(byte_index + 1, size) # Read a file as bytes one_byte = file.read(1) element = list( map( int, list( str(bin(struct.unpack( "B", one_byte)[0]))[2:].zfill(8)))) for bit_index in range(8): matrix[row][col] = element[bit_index] col += 1 if col == segment_length: col = 0 row += 1 if int(len(str(bin(len(matrix)))) - 2) * 7 > segment_length: if need_log: log.output( log.WARN, str(__name__), str(sys._getframe().f_code.co_name), "The proportion of index in whole sequence may be high. \n" "It is recommended to increase the length of output DNA sequences " "or to divide the file into more segment pools") return matrix, size except IOError: log.output( log.ERROR, str(__name__), str(sys._getframe().f_code.co_name), "The file selection operation was not performed correctly. Please execute the operation again!" )
def __init__(self, base_reference=None, current_code_matrix=None, support_bases=None, support_spacing=0, max_ratio=0.8, search_count=100, max_homopolymer=math.inf, max_content=1, min_free_energy=None): """ introduction: The initialization method of YYC. :param base_reference: Correspondence between base and binary data (RULE 1). Make sure that Two of the bases are 1 and the other two are 0, so there are only 6 case. :param current_code_matrix: Conversion rule between base and btis based on support and current base (RULE 2). Label row is the support base, label col is the current base. A T C G A X1 Y1 X2 Y2 T X3 Y3 X4 Y4 C X5 Y5 X6 Y6 G X7 Y7 X8 Y8 Make sure that Xn + Yn = 1 and Xn * Yn = 0, n is in [1, 8]. :param support_bases: Base replenishment before official data. Make sure that the count of support base must more than support spacing. Make sure that the number range of each position is {0, 1, 2, 3}, reference base index. :param support_spacing: Spacing between support base and current base. When the support base is the front of the current base, the spacing is 0. :param max_ratio: The max ratio of 0 or 1. When the (count/length) >= this parameter, we decide that this binary sequence is not good. :param max_homopolymer: maximum length of homopolymer. :param max_content: maximum content of C and G, which means GC content is in [1 - max_content, max_content]. :param min_free_energy: the free energy of DNA sequence is lower than required min free energy. """ # Set default values for Rules 1 and 2 (RULE 495) if not base_reference: base_reference = [0, 1, 0, 1] if not current_code_matrix: current_code_matrix = [ [1, 1, 0, 0], [1, 0, 0, 1], [1, 1, 0, 0], [1, 1, 0, 0], ] if not support_bases: support_bases = [index_base[0]] # Assign input data to class variables self.base_reference = base_reference self.current_code_matrix = current_code_matrix self.support_bases = support_bases self.support_spacing = support_spacing self.max_ratio = max_ratio self.search_count = search_count self.max_homopolymer = max_homopolymer self.max_content = max_content self.min_free_energy = min_free_energy # Detect parameters correctness self._init_check() self.file_size = 0 self.monitor = monitor.Monitor()