def write_bits_to_file(path, matrix, bit_size, need_logs=False): monitor = Monitor() with open(path, "wb+") as file: if need_logs: print("Write file from binary matrix: " + path) byte_size = int(bit_size / 8) # Change bit to byte (8 -> 1), and write a file as bytes bit_index = 0 temp_byte = 0 for row in range(len(matrix)): for col in range(len(matrix[0])): bit_index += 1 temp_byte *= 2 temp_byte += matrix[row][col] if bit_index == 8: if byte_size > 0: file.write(struct.pack("B", int(temp_byte))) bit_index = 0 temp_byte = 0 byte_size -= 1 if need_logs: monitor.output(row + 1, len(matrix)) return True
def read_bits_from_file(path, segment_length=120, need_logs=False): monitor = Monitor() with open(path, mode="rb") as file: if need_logs: print("Read binary matrix from file: " + path) size = os.path.getsize(path) if segment_length > 0: # Set init storage matrix matrix = [[0 for _ in range(segment_length)] for _ in range(math.ceil(size * 8 / segment_length))] row = 0 col = 0 for byte_index in range(size): # Read a file as bytes one_byte = file.read(1) element = list( map( int, list( str(bin(struct.unpack( "B", one_byte)[0]))[2:].zfill(8)))) for bit_index in range(8): matrix[row][col] = element[bit_index] col += 1 if col == segment_length: col = 0 row += 1 if need_logs: monitor.output(byte_index + 1, size) else: matrix = [] for byte_index in range(size): # Read a file as bytes one_byte = file.read(1) matrix += list( map( int, list( str(bin(struct.unpack( "B", one_byte)[0]))[2:].zfill(8)))) matrix = [matrix] if int(len(str(bin(len(matrix)))) - 2) * 4 > segment_length: if need_logs: warn( "The proportion of index in whole sequence may be high. \n" "It is recommended to increase the length of output DNA sequences " "or to divide the file into more segment pools") return matrix, size * 8
def write_dna_file(path, dna_sequences, need_logs=False): monitor = Monitor() with open(path, "w") as file: if need_logs: print("Write DNA sequences to file: " + path) for index, dna_sequence in enumerate(dna_sequences): file.write("".join(dna_sequence) + "\n") if need_logs: monitor.output(index + 1, len(dna_sequences)) return True
def connect_all(bit_segments, need_logs=False): index_binary_length = int(len(str(bin(len(bit_segments)))) - 2) if need_logs: print("Add index in the binary matrix.") monitor = Monitor() connected_bit_segments = [] for row in range(len(bit_segments)): connected_bit_segments.append( connect(row, bit_segments[row], index_binary_length)) if need_logs: monitor.output(row + 1, len(bit_segments)) return connected_bit_segments, index_binary_length
def write_bits_to_str(matrix, bit_size, need_logs=False): if need_logs: print("Write binary matrix to string.") monitor = Monitor() temp_list = [] for index, row in enumerate(matrix): temp_list += row if need_logs: monitor.output(index, len(matrix)) temp_list = temp_list[:bit_size] values = [] for index in range(0, len(temp_list), 8): values.append( int("".join(list(map(str, temp_list[index:index + 8]))), 2)) return str(bytes(values), encoding="utf8")
def divide_all(bit_segments, need_logs=False): index_binary_length = int(len(str(bin(len(bit_segments)))) - 2) if need_logs: print("Divide index and data from binary matrix.") monitor = Monitor() indices = [] divided_matrix = [] for row in range(len(bit_segments)): index, data = divide(bit_segments[row], index_binary_length) indices.append(index) divided_matrix.append(data) if need_logs: monitor.output(row + 1, len(bit_segments)) return indices, divided_matrix
def sort_order(indices, bit_segments, need_logs=False): monitor = Monitor() if need_logs: print("Restore data order according to index.") sorted_bit_segments = [] for index in range(max(indices) + 1): if index in indices: sorted_bit_segments.append(bit_segments[indices.index(index)]) else: sorted_bit_segments.append( [0 for _ in range(len(bit_segments[0]))]) if need_logs: monitor.output(index + 1, max(indices) + 1) return sorted_bit_segments
def write_bits_to_file(path, matrix, bit_size, need_logs=True): monitor = Monitor() with open(path, "wb+") as file: if need_logs: print("Write file from binary matrix: " + path) matrix = array(matrix).reshape(-1) for position in range(0, bit_size, 8): file.write( struct.pack( "B", int("".join(list(map(str, matrix[position:position + 8]))), 2))) if need_logs: monitor.output(int(position / 8 + 1), int(bit_size / 8)) return True
def read_dna_file(path, need_logs=True): monitor = Monitor() dna_sequences = [] with open(path, "r") as file: if need_logs: print("Read DNA sequences from file: " + path) # Read current file by line lines = file.readlines() for index, line in enumerate(lines): dna_sequences.append(list(line.replace("\n", ""))) if need_logs: monitor.output(index + 1, len(lines)) return dna_sequences
def read_bits_from_str(string, segment_length=120, need_logs=False): monitor = Monitor() if need_logs: print("Read binary matrix from string: " + string) data = [] for value in bytes(string, encoding="utf8"): data += list(map(int, list(str(bin(value))[2:].zfill(8)))) matrix = [] for index in range(0, len(data), segment_length): if index + segment_length < len(data): matrix.append(data[index:index + segment_length]) else: matrix.append(data[index:] + [0] * (segment_length - len(data[index:]))) if need_logs: monitor.output(max(index + segment_length, len(data)), len(data)) return matrix, len(data)
def read_bits_from_file(path, segment_length=120, need_logs=True): monitor = Monitor() if need_logs: print("Read binary matrix from file: " + path) matrix, values = [], fromfile(file=path, dtype=uint8) for current, value in enumerate(values): matrix += list(map(int, list(str(bin(value))[2:].zfill(8)))) if need_logs: monitor.output(current + 1, len(values)) if len(matrix) % segment_length != 0: matrix += [0] * (segment_length - len(matrix) % segment_length) matrix = array(matrix) matrix = matrix.reshape(int(len(matrix) / segment_length), segment_length) if need_logs: print( "There are " + str(len(values) * 8) + " bits in the inputted file. " + "Please keep this information in mind if you do not consider storing the model in serialization!" ) return matrix.tolist(), len(values) * 8
class AbstractErrorCorrectionCode(object): def __init__(self, need_logs): self.need_logs = need_logs self.segment_length = None self.monitor = Monitor() def insert(self, bit_segments): if self.need_logs: print("Insert the error-correction code to the bit segments.") verified_bit_segments = [] if type(bit_segments) == list and type(bit_segments[0]) == list: self.segment_length = len(bit_segments[0]) for index, bit_segment in enumerate(bit_segments): verified_bit_segments.append(self.insert_one(bit_segment)) if self.need_logs: self.monitor.output(index + 1, len(bit_segments)) elif type(bit_segments) == list and type(bit_segments[0]) == int: self.segment_length = len(bit_segments) verified_bit_segments = self.insert_one(bit_segments) else: raise ValueError( "The matrix must be 1-dimensional or 2-dimensional, and the value is of type \"int\"." ) if self.need_logs: print( "The error-correction code automatically records the original length (" + str(self.segment_length) + ") of each binary segment, which provide more information when an error really occurs." ) print("We recommend you save this code through serialized object!") print( "Generally, ignoring it during decoding does not cause more problems." ) return verified_bit_segments, len(verified_bit_segments[0]) - len( bit_segments[0]) def remove(self, verified_bit_segments): if self.need_logs: print( "Check and remove the error-correction code from the bit segments." ) bit_segments = [] error_bit_segments = [] error_indices = [] if type(verified_bit_segments) == list and type( verified_bit_segments[0]) == list: error_rate = 0 for index, verified_bit_segment in enumerate( verified_bit_segments): if verified_bit_segment is not None: output = self.remove_one(verified_bit_segment) data, data_type = output.get("data"), output.get("type") if data_type: if self.segment_length is not None: bit_segments.append(data[len(data) - self.segment_length:]) else: bit_segments.append(data) else: error_rate += 1 error_indices.append(index) error_bit_segments.append(data) else: error_rate += 1 error_indices.append(index) error_bit_segments.append(None) if self.need_logs: self.monitor.output(index + 1, len(verified_bit_segments)) error_rate /= len(verified_bit_segments) elif type(verified_bit_segments) == list and type( verified_bit_segments[0]) == int: output = self.remove_one(verified_bit_segments[0]) data, data_type = output.get("data"), output.get("type") if data_type: error_rate = 0 if self.segment_length is not None: bit_segments = data[len(data) - self.segment_length:] else: bit_segments = data else: error_rate = 1 error_indices.append(0) error_bit_segments.append(data) else: raise ValueError( "The matrix must be 1-dimensional or 2-dimensional, and the value is of type \"int\"." ) return { "bit": bit_segments, "e_r": error_rate, "e_i": error_indices, "e_bit": error_bit_segments } def insert_one(self, input_list): raise NotImplementedError( "\"insert_one\" interface needs to be implemented!") def remove_one(self, input_list): raise NotImplementedError( "\"remove_one\" interface needs to be implemented!")
class AbstractErrorCorrectionCode(object): def __init__(self, need_logs): self.need_logs = need_logs self.segment_lengths = [] self.monitor = Monitor() def insert(self, bit_segments): if self.need_logs: print("Insert the error-correction code to the bit segments.") self.segment_lengths = [] verified_bit_segments = [] if type(bit_segments) == list and type(bit_segments[0]) == list: for index, bit_segment in enumerate(bit_segments): self.segment_lengths.append(len(bit_segment)) verified_bit_segments.append(self.insert_one(bit_segment)) if self.need_logs: self.monitor.output(index + 1, len(bit_segments)) elif type(bit_segments) == list and type(bit_segments[0]) == int: self.segment_lengths = [len(bit_segments)] verified_bit_segments = self.insert_one(bit_segments) else: raise ValueError("The matrix must be 1-dimensional or 2-dimensional, and the value is of type \"int\".") self.monitor.restore() return verified_bit_segments, len(verified_bit_segments[0]) - len(bit_segments[0]) def remove(self, verified_bit_segments): if self.need_logs: print("Check and remove the error-correction code from the bit segments.") bit_segments = [] error_bit_segments = [] error_indices = [] if type(verified_bit_segments) == list and type(verified_bit_segments[0]) == list: error_rate = 0 for index, verified_bit_segment in enumerate(verified_bit_segments): if verified_bit_segment is not None: output = self.remove_one(verified_bit_segment) data, data_type = output.get("data"), output.get("type") if data_type and len(data) >= self.segment_lengths[index]: bit_segments.append(data[len(data) - self.segment_lengths[index]:]) else: error_rate += 1 error_indices.append(index) error_bit_segments.append(data) else: error_rate += 1 error_indices.append(index) error_bit_segments.append(None) if self.need_logs: self.monitor.output(index + 1, len(verified_bit_segments)) error_rate /= len(verified_bit_segments) elif type(verified_bit_segments) == list and type(verified_bit_segments[0]) == int: output = self.remove_one(verified_bit_segments[0]) data, data_type = output.get("data"), output.get("type") if data_type: error_rate = 0 bit_segments = [data[len(data) - self.segment_lengths[0]:]] else: error_rate = 1 error_indices.append(0) error_bit_segments.append(data) else: raise ValueError("The matrix must be 1-dimensional or 2-dimensional, and the value is of type \"int\".") self.monitor.restore() return {"bit": bit_segments, "e_r": error_rate, "e_i": error_indices, "e_bit": error_bit_segments} def insert_one(self, input_list): raise NotImplementedError("\"insert_one\" interface needs to be implemented!") def remove_one(self, input_list): raise NotImplementedError("\"remove_one\" interface needs to be implemented!")