Ejemplo n.º 1
0
 def decodeFile(self, packet_len_format: str = "I", crc_len_format: str = "L",
                number_of_chunks_len_format: str = "I", degree_len_format: str = "I", seed_len_format: str = "I",
                last_chunk_len_format: str = "I") -> typing.Optional[int]:
     decoded: bool = False
     self.EOF: bool = False
     if self.static_number_of_chunks is not None:
         self.number_of_chunks = self.static_number_of_chunks
         number_of_chunks_len_format = ""  # if we got static number_of_chunks we do not need it in struct string
     if self.file.lower().endswith("fasta"):
         self.f.close()
         self.f = open(self.file, "r")
         raw_packet_list = []
         while not (decoded or self.EOF):
             line = self.f.readline()
             if not line:
                 self.EOF = True
                 break
             try:
                 error_prob, seed = line[1:].replace("\n", "").split("_")
             except:
                 error_prob, seed = "0", "0"
             line = self.f.readline()
             if not line:
                 self.EOF = True
                 break
             dna_str = line.replace("\n", "")
             raw_packet_list.append((error_prob, seed, dna_str))
             new_pack = self.parse_raw_packet(BytesIO(tranlate_quat_to_byte(dna_str)).read(),
                                              crc_len_format=crc_len_format,
                                              number_of_chunks_len_format=number_of_chunks_len_format,
                                              degree_len_format=degree_len_format,
                                              seed_len_format=seed_len_format)
             decoded = self.input_new_packet(new_pack)
             if self.progress_bar is not None:
                 self.progress_bar.update(self.correct, Corrupt=self.corrupt)
         else:
             while not (decoded or self.EOF):
                 new_pack = self.getNextValidPacket(False, packet_len_format=packet_len_format,
                                                    crc_len_format=crc_len_format,
                                                    number_of_chunks_len_format=number_of_chunks_len_format,
                                                    degree_len_format=degree_len_format,
                                                    seed_len_format=seed_len_format,
                                                    last_chunk_len_format=last_chunk_len_format)
                 if new_pack is None:
                     break
                 # koennte durch input_new_packet ersetzt werden:
                 # self.addPacket(new_pack)
                 decoded = self.input_new_packet(new_pack)
                 ##
     print("Decoded Packets: " + str(self.correct))
     print("Corrupt Packets : " + str(self.corrupt))
     self.f.close()
     if self.GEPP.isPotentionallySolvable():
         return self.GEPP.solve()
     if not decoded and self.EOF:
         print("Unable to retrieve File from Chunks. Too much errors?")
         return -1
Ejemplo n.º 2
0
def window_parse_packets():
    """
    This function uses a sliding window to find (correct) packets in the input sequnce
    """
    decoder = create_decoder()
    fasta = load_fasta(INPUT_FILE)
    window_start = 0
    packets = []
    correct = 0
    correct_seqs = []
    for k in fasta.keys():
        while (window_start + PACKET_SEQ_LENGTH) <= len(fasta[k]):
            line = fasta.get(k)[window_start:(window_start +
                                              PACKET_SEQ_LENGTH)]
            # ensure the packet adheres to the rules
            rule_err = RULES.apply_all_rules(line)
            if rule_err < RULES_DROP_LIMIT:
                new_pack = decoder.parse_raw_packet(
                    BytesIO(tranlate_quat_to_byte(line)).read(),
                    crc_len_format=CRC_LEN_FORMAT,
                    packet_len_format="",
                    number_of_chunks_len_format="",
                    id_len_format=ID_LEN_FORMAT)
                # TODO: one could convert the RS-repaired data back to dna and check if it adheres to all rules - this
                # would be less restrictive since packets that violate constraints might still be repairable
                if new_pack is not None and new_pack != "CORRUPT":
                    # packet correct: add to correct_seqs and move the window by PACKET_SEQ_LENGTH
                    correct += 1
                    packets.append(new_pack)
                    correct_seqs.append(line)
                    window_start += PACKET_SEQ_LENGTH
                else:
                    # RS was unable to repair the packet: move the window by one base
                    window_start += 1
            else:
                # packet does not adhere to the rules: move the window by one base
                window_start += 1
    # (optional) write all correctly parsed sequences to a file
    # this will allow using the usual means of decoding the input (e.g. ConfigWorker)
    with open("correct_seqs.fasta", "w") as f:
        for i, seq in enumerate(correct_seqs):
            f.write(f">{i}\n{seq}\n")
    print(f"Correct sequences: {correct}")
    return packets
Ejemplo n.º 3
0
def reconstruct_Packets(lst):
    """
    norepair_symbols = 3
    decoder = RU10Decoder(file=None,
                          error_correction=lambda x: reed_solomon_decode(x, norepair_symbols),
                          use_headerchunk=False, static_number_of_chunks=STATIC_NUMBER_OF_CHUNKS)
    decoder.number_of_chunks = STATIC_NUMBER_OF_CHUNKS
    """
    packet_list = []
    i = 0
    for error_prob, seed, dna_str in lst:
        packet = decoder.parse_raw_packet(BytesIO(
            tranlate_quat_to_byte(dna_str)).read(),
                                          crc_len_format="L",
                                          number_of_chunks_len_format="",
                                          packet_len_format="H",
                                          id_len_format="I")
        packet_list.append(
            (decoder.removeAndXorAuxPackets(packet), packet.get_data()))
        if i % 100 == 0:
            print(str(i))
        i += 1
    return packet_list
Ejemplo n.º 4
0
     static_number_of_chunks=STATIC_NUMBER_OF_CHUNKS)
 decoder.number_of_chunks = STATIC_NUMBER_OF_CHUNKS
 raw_packet_list = []
 while True:
     line = in_file.readline()
     if not line:
         break
     error_prob, seed = line[1:].replace("\n", "").split("_")
     line = in_file.readline()
     if not line:
         break
     dna_str = line.replace("\n", "")
     raw_packet_list.append((error_prob, seed, dna_str))
     if not PARALLEL:
         packet = decoder.parse_raw_packet(BytesIO(
             tranlate_quat_to_byte(dna_str)).read(),
                                           crc_len_format="L",
                                           number_of_chunks_len_format="",
                                           packet_len_format="H",
                                           id_len_format="I")
         res = decoder.input_new_packet(packet)
         if decoder.GEPP.n % 100 == 0:
             print("Parsed packet " + str(seed) + " - " +
                   str(decoder.GEPP.n))
         if res:
             decoder.saveDecodedFile(last_chunk_len_format="H",
                                     null_is_terminator=False,
                                     print_to_output=False)
             break
 if PARALLEL:
     norepair_symbols = 3
Ejemplo n.º 5
0
 def decodeFile(self,
                packet_len_format: str = "I",
                crc_len_format: str = "L",
                number_of_chunks_len_format: str = "I",
                id_len_format: str = "I"):
     """
     Decodes the information from a file if self.file represents a file and the packets were saved in a single file.
     :param packet_len_format: Format of the packet length
     :param crc_len_format:  Format of the crc length
     :param number_of_chunks_len_format: Format of the number of chunks length
     :param id_len_format: Format of the ID length
     :return: -1 if the decoding wasn't successful
     """
     decoded = False
     self.EOF = False
     if self.file.lower().endswith("dna"):
         try:
             self.f.close()
             self.f = quat_file_to_bin(self.file)
         except TypeError:
             print("skipping CORRUPT file - contains illegal character(s)")
             self.corrupt += 1
     if self.static_number_of_chunks is not None:
         self.number_of_chunks = self.static_number_of_chunks
         number_of_chunks_len_format = ""  # if we got static number_of_chunks we do not need it in struct string
     if self.file.lower().endswith("fasta"):
         self.f.close()
         self.f = open(self.file, "r")
         raw_packet_list = []
         while not (decoded or self.EOF):
             line = self.f.readline()
             if not line:
                 self.EOF = True
                 break
             try:
                 error_prob, seed = line[1:].replace("\n", "").split("_")
             except:
                 error_prob, seed = "0", "0"
             line = self.f.readline()
             if not line:
                 self.EOF = True
                 break
             dna_str = line.replace("\n", "")
             raw_packet_list.append((error_prob, seed, dna_str))
             new_pack = self.parse_raw_packet(
                 BytesIO(tranlate_quat_to_byte(dna_str)).read(),
                 crc_len_format=crc_len_format,
                 number_of_chunks_len_format=number_of_chunks_len_format,
                 packet_len_format=packet_len_format,
                 id_len_format=id_len_format)
             decoded = self.input_new_packet(new_pack)
     else:
         while not (decoded or self.EOF):
             new_pack = self.getNextValidPacket(
                 False,
                 packet_len_format=packet_len_format,
                 crc_len_format=crc_len_format,
                 number_of_chunks_len_format=number_of_chunks_len_format,
                 id_len_format=id_len_format)
             if new_pack is None:
                 break
             decoded = self.input_new_packet(new_pack)
     print("Decoded Packets: " + str(self.correct))
     print("Corrupt Packets : " + str(self.corrupt))
     if not decoded and self.EOF:
         print("Unable to retrieve File from Chunks. Too much errors?")
         return -1