Ejemplo n.º 1
0
    def encode(self, bit_segments):
        dna_sequences = []

        self.index_length = int(len(str(bin(len(bit_segments)))) - 2)
        self.total_count = len(bit_segments)

        while len(bit_segments) > 0:
            fixed_bit_segment = bit_segments.pop()

            is_finish = False
            for pair_time in range(self.max_iterations):
                if len(bit_segments) > 0:
                    selected_index = random.randint(0, len(bit_segments) - 1)
                    selected_bit_segment = bit_segments[selected_index]

                    dna_sequence = [[], []]
                    support_nucleotide_1 = self.virtual_nucleotide
                    support_nucleotide_2 = self.virtual_nucleotide
                    for bit_1, bit_2 in zip(fixed_bit_segment,
                                            selected_bit_segment):
                        current_nucleotide_1 = self._bits_to_nucleotide(
                            bit_1, bit_2, support_nucleotide_1)
                        current_nucleotide_2 = self._bits_to_nucleotide(
                            bit_2, bit_1, support_nucleotide_2)
                        dna_sequence[0].append(current_nucleotide_1)
                        dna_sequence[1].append(current_nucleotide_2)
                        support_nucleotide_1 = current_nucleotide_1
                        support_nucleotide_2 = current_nucleotide_2

                    if screen.check("".join(dna_sequence[0]),
                                    max_homopolymer=self.max_homopolymer,
                                    max_content=self.max_content):
                        is_finish = True
                        dna_sequences.append(dna_sequence[0])
                        del bit_segments[selected_index]
                        break
                    elif screen.check("".join(dna_sequence[1]),
                                      max_homopolymer=self.max_homopolymer,
                                      max_content=self.max_content):
                        is_finish = True
                        dna_sequences.append(dna_sequence[1])
                        del bit_segments[selected_index]
                        break

            # additional information
            if not is_finish:
                dna_sequences.append(self.addition(fixed_bit_segment))

            if self.need_logs:
                self.monitor.output(self.total_count - len(bit_segments),
                                    self.total_count)

        if self.need_logs:
            print("There are " +
                  str(len(dna_sequences) * 2 - self.total_count) +
                  " random bit segment(s) adding for reliability.")

        return dna_sequences
Ejemplo n.º 2
0
    def faster_encode(self, bit_segments):
        if self.need_logs:
            print(
                "Faster setting may increases the number of additional binary segments "
                + "(3 ~ 4 times than that of normal setting).")

        dna_sequences = []

        while len(bit_segments) > 0:
            fixed_bit_segment, is_finish = bit_segments.pop(), False
            for pair_time in range(self.max_iterations):
                if len(bit_segments) > 0:
                    selected_index = random.randint(0, len(bit_segments) - 1)
                    selected_bit_segment = bit_segments[selected_index]

                    dna_sequence = [[], []]
                    support_nucleotide_1 = self.virtual_nucleotide
                    support_nucleotide_2 = self.virtual_nucleotide
                    for bit_1, bit_2 in zip(fixed_bit_segment,
                                            selected_bit_segment):
                        current_nucleotide_1 = self._bits_to_nucleotide(
                            bit_1, bit_2, support_nucleotide_1)
                        current_nucleotide_2 = self._bits_to_nucleotide(
                            bit_2, bit_1, support_nucleotide_2)
                        dna_sequence[0].append(current_nucleotide_1)
                        dna_sequence[1].append(current_nucleotide_2)
                        support_nucleotide_1 = current_nucleotide_1
                        support_nucleotide_2 = current_nucleotide_2

                    if screen.check("".join(dna_sequence[0]),
                                    max_homopolymer=self.max_homopolymer,
                                    max_content=self.max_content):
                        is_finish = True
                        dna_sequences.append(dna_sequence[0])
                        del bit_segments[selected_index]
                        break
                    elif screen.check("".join(dna_sequence[1]),
                                      max_homopolymer=self.max_homopolymer,
                                      max_content=self.max_content):
                        is_finish = True
                        dna_sequences.append(dna_sequence[1])
                        del bit_segments[selected_index]
                        break

            # additional information
            if not is_finish:
                dna_sequences.append(
                    self.addition(fixed_bit_segment, self.total_count))

            if self.need_logs:
                self.monitor.output(self.total_count - len(bit_segments),
                                    self.total_count)

        return dna_sequences
Ejemplo n.º 3
0
    def addition(self, fixed_bit_segment, total_count):
        while True:
            # insert at least 2 interval.
            random_index = random.randint(total_count + 3,
                                          math.pow(2, self.index_length) - 1)
            random_segment = list(
                map(int,
                    list(str(bin(random_index))[2:].zfill(self.index_length))))

            dna_sequence = [[], []]
            support_nucleotide_1 = self.virtual_nucleotide
            support_nucleotide_2 = self.virtual_nucleotide

            for bit_1, bit_2 in zip(fixed_bit_segment[:self.index_length],
                                    random_segment):
                current_nucleotide_1 = self._bits_to_nucleotide(
                    bit_1, bit_2, support_nucleotide_1)
                current_nucleotide_2 = self._bits_to_nucleotide(
                    bit_2, bit_1, support_nucleotide_2)
                dna_sequence[0].append(current_nucleotide_1)
                dna_sequence[1].append(current_nucleotide_2)
                support_nucleotide_1 = current_nucleotide_1
                support_nucleotide_2 = current_nucleotide_2

            work_flags = [True, True]
            for fixed_bit in fixed_bit_segment[self.index_length:]:
                current_nucleotide_1, current_nucleotide_2 = None, None
                for bit in [0, 1]:
                    if work_flags[0] and current_nucleotide_1 is None:
                        current_nucleotide_1 = self._bits_to_nucleotide(
                            fixed_bit, bit, support_nucleotide_1)
                        if not screen.check(
                                "".join(dna_sequence[0]) +
                                current_nucleotide_1,
                                max_homopolymer=self.max_homopolymer,
                                max_content=self.max_content):
                            current_nucleotide_1 = None
                    if work_flags[1] and current_nucleotide_2 is None:
                        current_nucleotide_2 = self._bits_to_nucleotide(
                            bit, fixed_bit, support_nucleotide_2)
                        if not screen.check(
                                "".join(dna_sequence[1]) +
                                current_nucleotide_2,
                                max_homopolymer=self.max_homopolymer,
                                max_content=self.max_content):
                            current_nucleotide_2 = None

                if current_nucleotide_1 is None:
                    work_flags[0] = False
                    dna_sequence[0] = None
                else:
                    dna_sequence[0].append(current_nucleotide_1)
                    support_nucleotide_1 = current_nucleotide_1

                if current_nucleotide_2 is None:
                    work_flags[1] = False
                    dna_sequence[1] = None
                else:
                    dna_sequence[1].append(current_nucleotide_2)
                    support_nucleotide_2 = current_nucleotide_2

            for potential_dna_sequence in dna_sequence:
                if potential_dna_sequence is not None and screen.check(
                        "".join(potential_dna_sequence),
                        max_homopolymer=self.max_homopolymer,
                        max_content=self.max_content):
                    return potential_dna_sequence
Ejemplo n.º 4
0
    def encode(self, bit_segments):
        for segment_index, bit_segment in enumerate(bit_segments):
            if len(bit_segment) % 2 != 0:
                bit_segments[segment_index] = [0] + bit_segment

        self.decode_packets = len(bit_segments)

        dna_sequences = []
        final_count = math.ceil(len(bit_segments) * (1 + self.redundancy))

        # things related to random number generator, starting an lfsr with a certain state and a polynomial for 32bits.
        lfsr = DNAFountain.LFSR().lfsr_s_p()
        # create the solition distribution object
        self.prng = DNAFountain.PRNG(number=self.decode_packets,
                                     delta=self.delta,
                                     c=self.c_dist)

        used_seeds = dict()
        chuck_recorder = []
        while len(dna_sequences) < final_count:
            seed = next(lfsr)
            if seed in used_seeds:
                continue

            # initialize droplet and trans-code to DNA.
            droplet = DNAFountain.Droplet()
            dna_sequence = droplet.get_dna(seed, self.prng, bit_segments,
                                           self.header_size)

            # check validity.
            if screen.check("".join(dna_sequence),
                            max_homopolymer=self.homopolymer,
                            max_content=0.5 + self.gc_bias):
                dna_sequences.append(dna_sequence)
                chuck_recorder.append(droplet.chuck_indices)

            if self.need_logs:
                self.monitor.output(len(dna_sequences), final_count)

        # pre-check the decoding process in the encoding process
        if self.need_pre_check:
            try:
                visited_indices = [0] * self.decode_packets
                for chuck_indices in chuck_recorder:
                    for chuck_index in chuck_indices:
                        visited_indices[chuck_index] += 1
                if 0 in visited_indices:
                    no_visit_indices = []
                    for index, visited in enumerate(visited_indices):
                        if visited == 0:
                            no_visit_indices.append(index)
                    raise ValueError("bit segment " + str(no_visit_indices) +
                                     " are not been encoded!")
                if self.need_logs:
                    print("Pre-check the decoding process.")
                self.decode(dna_sequences)
            except ValueError:
                raise ValueError(
                    "Based on the pre decoding operation, "
                    "it is found that the encoded data does not meet the full rank condition."
                    "Please increase \"redundancy\" or use compression to "
                    "change the original digital data.")
        else:
            if self.need_logs:
                print(
                    "We recommend that you test whether it can be decoded before starting the wet experiment."
                )

        return dna_sequences
Ejemplo n.º 5
0
    def normal_encode(self, bit_segments):
        dna_sequences = []
        if self.need_logs:
            print(
                "Separate \'good\' binary segments from \'bad\' binary segments."
            )

        bad_data = []
        for row in range(len(bit_segments)):
            if numpy.sum(bit_segments[row]) > len(bit_segments[row]) * self.max_ratio \
                    or numpy.sum(bit_segments[row]) < len(bit_segments[row]) * (1 - self.max_ratio):
                bad_data.append(row)

        if len(bit_segments) < len(bad_data) * 5:
            if self.need_logs:
                print(
                    "There may be a large number of sequences that are difficult for synthesis or sequencing. "
                    +
                    "We recommend you to re-select the rule or take a new run."
                )

        if len(bad_data) == 0 and len(bit_segments) == 0:
            return []
        elif len(bad_data) == 0:
            good_data, band_data = [], []
            for row in range(len(bit_segments)):
                if self.need_logs:
                    self.monitor.output(row + 1, len(bit_segments))
                good_data.append(bit_segments[row])
        elif len(bad_data) == len(bit_segments):
            good_data, bad_data = [], []
            for row in range(len(bit_segments)):
                if self.need_logs:
                    self.monitor.output(row + 1, len(bit_segments))
                bad_data.append(bit_segments[row])
        else:
            x, y = [], []
            for row in range(len(bit_segments)):
                if self.need_logs:
                    self.monitor.output(row + 1, len(bit_segments))
                if row in bad_data:
                    y.append(bit_segments[row])
                else:
                    x.append(bit_segments[row])
            good_data, bad_data = x, y

        if self.need_logs:
            print("Encode based on random pair iteration.")

        while len(good_data) + len(bad_data) > 0:
            if len(good_data) > 0 and len(bad_data) > 0:
                fixed_bit_segment, is_finish, state = good_data.pop(
                ), False, True
            elif len(good_data) > 0:
                fixed_bit_segment, is_finish, state = good_data.pop(
                ), False, False
            elif len(bad_data) > 0:
                fixed_bit_segment, is_finish, state = bad_data.pop(
                ), False, True
            else:
                raise ValueError("Wrong pairing for Yin-Yang Code!")

            for pair_time in range(self.max_iterations):
                if state:
                    if len(bad_data) > 0:
                        selected_index = random.randint(0, len(bad_data) - 1)
                        selected_bit_segment = bad_data[selected_index]
                    else:
                        break
                else:
                    if len(good_data) > 0:
                        selected_index = random.randint(0, len(good_data) - 1)
                        selected_bit_segment = good_data[selected_index]
                    else:
                        break

                dna_sequence = [[], []]
                support_nucleotide_1 = self.virtual_nucleotide
                support_nucleotide_2 = self.virtual_nucleotide
                for bit_1, bit_2 in zip(fixed_bit_segment,
                                        selected_bit_segment):
                    current_nucleotide_1 = self._bits_to_nucleotide(
                        bit_1, bit_2, support_nucleotide_1)
                    current_nucleotide_2 = self._bits_to_nucleotide(
                        bit_2, bit_1, support_nucleotide_2)
                    dna_sequence[0].append(current_nucleotide_1)
                    dna_sequence[1].append(current_nucleotide_2)
                    support_nucleotide_1 = current_nucleotide_1
                    support_nucleotide_2 = current_nucleotide_2

                if screen.check("".join(dna_sequence[0]),
                                max_homopolymer=self.max_homopolymer,
                                max_content=self.max_content):
                    is_finish = True
                    dna_sequences.append(dna_sequence[0])
                    if state:
                        del bad_data[selected_index]
                    else:
                        del good_data[selected_index]
                    break
                elif screen.check("".join(dna_sequence[1]),
                                  max_homopolymer=self.max_homopolymer,
                                  max_content=self.max_content):
                    is_finish = True
                    dna_sequences.append(dna_sequence[1])
                    if state:
                        del bad_data[selected_index]
                    else:
                        del good_data[selected_index]
                    break

            # additional information
            if not is_finish:
                dna_sequences.append(
                    self.addition(fixed_bit_segment, self.total_count))

            if self.need_logs:
                self.monitor.output(
                    self.total_count - (len(good_data) + len(bad_data)),
                    self.total_count)

        return dna_sequences