def encode(self, bit_segments): dna_sequences = [] self.index_length = int(len(str(bin(len(bit_segments)))) - 2) self.total_count = len(bit_segments) while len(bit_segments) > 0: fixed_bit_segment = bit_segments.pop() is_finish = False for pair_time in range(self.max_iterations): if len(bit_segments) > 0: selected_index = random.randint(0, len(bit_segments) - 1) selected_bit_segment = bit_segments[selected_index] dna_sequence = [[], []] support_nucleotide_1 = self.virtual_nucleotide support_nucleotide_2 = self.virtual_nucleotide for bit_1, bit_2 in zip(fixed_bit_segment, selected_bit_segment): current_nucleotide_1 = self._bits_to_nucleotide( bit_1, bit_2, support_nucleotide_1) current_nucleotide_2 = self._bits_to_nucleotide( bit_2, bit_1, support_nucleotide_2) dna_sequence[0].append(current_nucleotide_1) dna_sequence[1].append(current_nucleotide_2) support_nucleotide_1 = current_nucleotide_1 support_nucleotide_2 = current_nucleotide_2 if screen.check("".join(dna_sequence[0]), max_homopolymer=self.max_homopolymer, max_content=self.max_content): is_finish = True dna_sequences.append(dna_sequence[0]) del bit_segments[selected_index] break elif screen.check("".join(dna_sequence[1]), max_homopolymer=self.max_homopolymer, max_content=self.max_content): is_finish = True dna_sequences.append(dna_sequence[1]) del bit_segments[selected_index] break # additional information if not is_finish: dna_sequences.append(self.addition(fixed_bit_segment)) if self.need_logs: self.monitor.output(self.total_count - len(bit_segments), self.total_count) if self.need_logs: print("There are " + str(len(dna_sequences) * 2 - self.total_count) + " random bit segment(s) adding for reliability.") return dna_sequences
def faster_encode(self, bit_segments): if self.need_logs: print( "Faster setting may increases the number of additional binary segments " + "(3 ~ 4 times than that of normal setting).") dna_sequences = [] while len(bit_segments) > 0: fixed_bit_segment, is_finish = bit_segments.pop(), False for pair_time in range(self.max_iterations): if len(bit_segments) > 0: selected_index = random.randint(0, len(bit_segments) - 1) selected_bit_segment = bit_segments[selected_index] dna_sequence = [[], []] support_nucleotide_1 = self.virtual_nucleotide support_nucleotide_2 = self.virtual_nucleotide for bit_1, bit_2 in zip(fixed_bit_segment, selected_bit_segment): current_nucleotide_1 = self._bits_to_nucleotide( bit_1, bit_2, support_nucleotide_1) current_nucleotide_2 = self._bits_to_nucleotide( bit_2, bit_1, support_nucleotide_2) dna_sequence[0].append(current_nucleotide_1) dna_sequence[1].append(current_nucleotide_2) support_nucleotide_1 = current_nucleotide_1 support_nucleotide_2 = current_nucleotide_2 if screen.check("".join(dna_sequence[0]), max_homopolymer=self.max_homopolymer, max_content=self.max_content): is_finish = True dna_sequences.append(dna_sequence[0]) del bit_segments[selected_index] break elif screen.check("".join(dna_sequence[1]), max_homopolymer=self.max_homopolymer, max_content=self.max_content): is_finish = True dna_sequences.append(dna_sequence[1]) del bit_segments[selected_index] break # additional information if not is_finish: dna_sequences.append( self.addition(fixed_bit_segment, self.total_count)) if self.need_logs: self.monitor.output(self.total_count - len(bit_segments), self.total_count) return dna_sequences
def addition(self, fixed_bit_segment, total_count): while True: # insert at least 2 interval. random_index = random.randint(total_count + 3, math.pow(2, self.index_length) - 1) random_segment = list( map(int, list(str(bin(random_index))[2:].zfill(self.index_length)))) dna_sequence = [[], []] support_nucleotide_1 = self.virtual_nucleotide support_nucleotide_2 = self.virtual_nucleotide for bit_1, bit_2 in zip(fixed_bit_segment[:self.index_length], random_segment): current_nucleotide_1 = self._bits_to_nucleotide( bit_1, bit_2, support_nucleotide_1) current_nucleotide_2 = self._bits_to_nucleotide( bit_2, bit_1, support_nucleotide_2) dna_sequence[0].append(current_nucleotide_1) dna_sequence[1].append(current_nucleotide_2) support_nucleotide_1 = current_nucleotide_1 support_nucleotide_2 = current_nucleotide_2 work_flags = [True, True] for fixed_bit in fixed_bit_segment[self.index_length:]: current_nucleotide_1, current_nucleotide_2 = None, None for bit in [0, 1]: if work_flags[0] and current_nucleotide_1 is None: current_nucleotide_1 = self._bits_to_nucleotide( fixed_bit, bit, support_nucleotide_1) if not screen.check( "".join(dna_sequence[0]) + current_nucleotide_1, max_homopolymer=self.max_homopolymer, max_content=self.max_content): current_nucleotide_1 = None if work_flags[1] and current_nucleotide_2 is None: current_nucleotide_2 = self._bits_to_nucleotide( bit, fixed_bit, support_nucleotide_2) if not screen.check( "".join(dna_sequence[1]) + current_nucleotide_2, max_homopolymer=self.max_homopolymer, max_content=self.max_content): current_nucleotide_2 = None if current_nucleotide_1 is None: work_flags[0] = False dna_sequence[0] = None else: dna_sequence[0].append(current_nucleotide_1) support_nucleotide_1 = current_nucleotide_1 if current_nucleotide_2 is None: work_flags[1] = False dna_sequence[1] = None else: dna_sequence[1].append(current_nucleotide_2) support_nucleotide_2 = current_nucleotide_2 for potential_dna_sequence in dna_sequence: if potential_dna_sequence is not None and screen.check( "".join(potential_dna_sequence), max_homopolymer=self.max_homopolymer, max_content=self.max_content): return potential_dna_sequence
def encode(self, bit_segments): for segment_index, bit_segment in enumerate(bit_segments): if len(bit_segment) % 2 != 0: bit_segments[segment_index] = [0] + bit_segment self.decode_packets = len(bit_segments) dna_sequences = [] final_count = math.ceil(len(bit_segments) * (1 + self.redundancy)) # things related to random number generator, starting an lfsr with a certain state and a polynomial for 32bits. lfsr = DNAFountain.LFSR().lfsr_s_p() # create the solition distribution object self.prng = DNAFountain.PRNG(number=self.decode_packets, delta=self.delta, c=self.c_dist) used_seeds = dict() chuck_recorder = [] while len(dna_sequences) < final_count: seed = next(lfsr) if seed in used_seeds: continue # initialize droplet and trans-code to DNA. droplet = DNAFountain.Droplet() dna_sequence = droplet.get_dna(seed, self.prng, bit_segments, self.header_size) # check validity. if screen.check("".join(dna_sequence), max_homopolymer=self.homopolymer, max_content=0.5 + self.gc_bias): dna_sequences.append(dna_sequence) chuck_recorder.append(droplet.chuck_indices) if self.need_logs: self.monitor.output(len(dna_sequences), final_count) # pre-check the decoding process in the encoding process if self.need_pre_check: try: visited_indices = [0] * self.decode_packets for chuck_indices in chuck_recorder: for chuck_index in chuck_indices: visited_indices[chuck_index] += 1 if 0 in visited_indices: no_visit_indices = [] for index, visited in enumerate(visited_indices): if visited == 0: no_visit_indices.append(index) raise ValueError("bit segment " + str(no_visit_indices) + " are not been encoded!") if self.need_logs: print("Pre-check the decoding process.") self.decode(dna_sequences) except ValueError: raise ValueError( "Based on the pre decoding operation, " "it is found that the encoded data does not meet the full rank condition." "Please increase \"redundancy\" or use compression to " "change the original digital data.") else: if self.need_logs: print( "We recommend that you test whether it can be decoded before starting the wet experiment." ) return dna_sequences
def normal_encode(self, bit_segments): dna_sequences = [] if self.need_logs: print( "Separate \'good\' binary segments from \'bad\' binary segments." ) bad_data = [] for row in range(len(bit_segments)): if numpy.sum(bit_segments[row]) > len(bit_segments[row]) * self.max_ratio \ or numpy.sum(bit_segments[row]) < len(bit_segments[row]) * (1 - self.max_ratio): bad_data.append(row) if len(bit_segments) < len(bad_data) * 5: if self.need_logs: print( "There may be a large number of sequences that are difficult for synthesis or sequencing. " + "We recommend you to re-select the rule or take a new run." ) if len(bad_data) == 0 and len(bit_segments) == 0: return [] elif len(bad_data) == 0: good_data, band_data = [], [] for row in range(len(bit_segments)): if self.need_logs: self.monitor.output(row + 1, len(bit_segments)) good_data.append(bit_segments[row]) elif len(bad_data) == len(bit_segments): good_data, bad_data = [], [] for row in range(len(bit_segments)): if self.need_logs: self.monitor.output(row + 1, len(bit_segments)) bad_data.append(bit_segments[row]) else: x, y = [], [] for row in range(len(bit_segments)): if self.need_logs: self.monitor.output(row + 1, len(bit_segments)) if row in bad_data: y.append(bit_segments[row]) else: x.append(bit_segments[row]) good_data, bad_data = x, y if self.need_logs: print("Encode based on random pair iteration.") while len(good_data) + len(bad_data) > 0: if len(good_data) > 0 and len(bad_data) > 0: fixed_bit_segment, is_finish, state = good_data.pop( ), False, True elif len(good_data) > 0: fixed_bit_segment, is_finish, state = good_data.pop( ), False, False elif len(bad_data) > 0: fixed_bit_segment, is_finish, state = bad_data.pop( ), False, True else: raise ValueError("Wrong pairing for Yin-Yang Code!") for pair_time in range(self.max_iterations): if state: if len(bad_data) > 0: selected_index = random.randint(0, len(bad_data) - 1) selected_bit_segment = bad_data[selected_index] else: break else: if len(good_data) > 0: selected_index = random.randint(0, len(good_data) - 1) selected_bit_segment = good_data[selected_index] else: break dna_sequence = [[], []] support_nucleotide_1 = self.virtual_nucleotide support_nucleotide_2 = self.virtual_nucleotide for bit_1, bit_2 in zip(fixed_bit_segment, selected_bit_segment): current_nucleotide_1 = self._bits_to_nucleotide( bit_1, bit_2, support_nucleotide_1) current_nucleotide_2 = self._bits_to_nucleotide( bit_2, bit_1, support_nucleotide_2) dna_sequence[0].append(current_nucleotide_1) dna_sequence[1].append(current_nucleotide_2) support_nucleotide_1 = current_nucleotide_1 support_nucleotide_2 = current_nucleotide_2 if screen.check("".join(dna_sequence[0]), max_homopolymer=self.max_homopolymer, max_content=self.max_content): is_finish = True dna_sequences.append(dna_sequence[0]) if state: del bad_data[selected_index] else: del good_data[selected_index] break elif screen.check("".join(dna_sequence[1]), max_homopolymer=self.max_homopolymer, max_content=self.max_content): is_finish = True dna_sequences.append(dna_sequence[1]) if state: del bad_data[selected_index] else: del good_data[selected_index] break # additional information if not is_finish: dna_sequences.append( self.addition(fixed_bit_segment, self.total_count)) if self.need_logs: self.monitor.output( self.total_count - (len(good_data) + len(bad_data)), self.total_count) return dna_sequences