def decodeFile(self, packet_len_format: str = "I", crc_len_format: str = "L", number_of_chunks_len_format: str = "I", degree_len_format: str = "I", seed_len_format: str = "I", last_chunk_len_format: str = "I") -> typing.Optional[int]: decoded: bool = False self.EOF: bool = False if self.static_number_of_chunks is not None: self.number_of_chunks = self.static_number_of_chunks number_of_chunks_len_format = "" # if we got static number_of_chunks we do not need it in struct string if self.file.lower().endswith("fasta"): self.f.close() self.f = open(self.file, "r") raw_packet_list = [] while not (decoded or self.EOF): line = self.f.readline() if not line: self.EOF = True break try: error_prob, seed = line[1:].replace("\n", "").split("_") except: error_prob, seed = "0", "0" line = self.f.readline() if not line: self.EOF = True break dna_str = line.replace("\n", "") raw_packet_list.append((error_prob, seed, dna_str)) new_pack = self.parse_raw_packet(BytesIO(tranlate_quat_to_byte(dna_str)).read(), crc_len_format=crc_len_format, number_of_chunks_len_format=number_of_chunks_len_format, degree_len_format=degree_len_format, seed_len_format=seed_len_format) decoded = self.input_new_packet(new_pack) if self.progress_bar is not None: self.progress_bar.update(self.correct, Corrupt=self.corrupt) else: while not (decoded or self.EOF): new_pack = self.getNextValidPacket(False, packet_len_format=packet_len_format, crc_len_format=crc_len_format, number_of_chunks_len_format=number_of_chunks_len_format, degree_len_format=degree_len_format, seed_len_format=seed_len_format, last_chunk_len_format=last_chunk_len_format) if new_pack is None: break # koennte durch input_new_packet ersetzt werden: # self.addPacket(new_pack) decoded = self.input_new_packet(new_pack) ## print("Decoded Packets: " + str(self.correct)) print("Corrupt Packets : " + str(self.corrupt)) self.f.close() if self.GEPP.isPotentionallySolvable(): return self.GEPP.solve() if not decoded and self.EOF: print("Unable to retrieve File from Chunks. Too much errors?") return -1
def window_parse_packets(): """ This function uses a sliding window to find (correct) packets in the input sequnce """ decoder = create_decoder() fasta = load_fasta(INPUT_FILE) window_start = 0 packets = [] correct = 0 correct_seqs = [] for k in fasta.keys(): while (window_start + PACKET_SEQ_LENGTH) <= len(fasta[k]): line = fasta.get(k)[window_start:(window_start + PACKET_SEQ_LENGTH)] # ensure the packet adheres to the rules rule_err = RULES.apply_all_rules(line) if rule_err < RULES_DROP_LIMIT: new_pack = decoder.parse_raw_packet( BytesIO(tranlate_quat_to_byte(line)).read(), crc_len_format=CRC_LEN_FORMAT, packet_len_format="", number_of_chunks_len_format="", id_len_format=ID_LEN_FORMAT) # TODO: one could convert the RS-repaired data back to dna and check if it adheres to all rules - this # would be less restrictive since packets that violate constraints might still be repairable if new_pack is not None and new_pack != "CORRUPT": # packet correct: add to correct_seqs and move the window by PACKET_SEQ_LENGTH correct += 1 packets.append(new_pack) correct_seqs.append(line) window_start += PACKET_SEQ_LENGTH else: # RS was unable to repair the packet: move the window by one base window_start += 1 else: # packet does not adhere to the rules: move the window by one base window_start += 1 # (optional) write all correctly parsed sequences to a file # this will allow using the usual means of decoding the input (e.g. ConfigWorker) with open("correct_seqs.fasta", "w") as f: for i, seq in enumerate(correct_seqs): f.write(f">{i}\n{seq}\n") print(f"Correct sequences: {correct}") return packets
def reconstruct_Packets(lst): """ norepair_symbols = 3 decoder = RU10Decoder(file=None, error_correction=lambda x: reed_solomon_decode(x, norepair_symbols), use_headerchunk=False, static_number_of_chunks=STATIC_NUMBER_OF_CHUNKS) decoder.number_of_chunks = STATIC_NUMBER_OF_CHUNKS """ packet_list = [] i = 0 for error_prob, seed, dna_str in lst: packet = decoder.parse_raw_packet(BytesIO( tranlate_quat_to_byte(dna_str)).read(), crc_len_format="L", number_of_chunks_len_format="", packet_len_format="H", id_len_format="I") packet_list.append( (decoder.removeAndXorAuxPackets(packet), packet.get_data())) if i % 100 == 0: print(str(i)) i += 1 return packet_list
static_number_of_chunks=STATIC_NUMBER_OF_CHUNKS) decoder.number_of_chunks = STATIC_NUMBER_OF_CHUNKS raw_packet_list = [] while True: line = in_file.readline() if not line: break error_prob, seed = line[1:].replace("\n", "").split("_") line = in_file.readline() if not line: break dna_str = line.replace("\n", "") raw_packet_list.append((error_prob, seed, dna_str)) if not PARALLEL: packet = decoder.parse_raw_packet(BytesIO( tranlate_quat_to_byte(dna_str)).read(), crc_len_format="L", number_of_chunks_len_format="", packet_len_format="H", id_len_format="I") res = decoder.input_new_packet(packet) if decoder.GEPP.n % 100 == 0: print("Parsed packet " + str(seed) + " - " + str(decoder.GEPP.n)) if res: decoder.saveDecodedFile(last_chunk_len_format="H", null_is_terminator=False, print_to_output=False) break if PARALLEL: norepair_symbols = 3
def decodeFile(self, packet_len_format: str = "I", crc_len_format: str = "L", number_of_chunks_len_format: str = "I", id_len_format: str = "I"): """ Decodes the information from a file if self.file represents a file and the packets were saved in a single file. :param packet_len_format: Format of the packet length :param crc_len_format: Format of the crc length :param number_of_chunks_len_format: Format of the number of chunks length :param id_len_format: Format of the ID length :return: -1 if the decoding wasn't successful """ decoded = False self.EOF = False if self.file.lower().endswith("dna"): try: self.f.close() self.f = quat_file_to_bin(self.file) except TypeError: print("skipping CORRUPT file - contains illegal character(s)") self.corrupt += 1 if self.static_number_of_chunks is not None: self.number_of_chunks = self.static_number_of_chunks number_of_chunks_len_format = "" # if we got static number_of_chunks we do not need it in struct string if self.file.lower().endswith("fasta"): self.f.close() self.f = open(self.file, "r") raw_packet_list = [] while not (decoded or self.EOF): line = self.f.readline() if not line: self.EOF = True break try: error_prob, seed = line[1:].replace("\n", "").split("_") except: error_prob, seed = "0", "0" line = self.f.readline() if not line: self.EOF = True break dna_str = line.replace("\n", "") raw_packet_list.append((error_prob, seed, dna_str)) new_pack = self.parse_raw_packet( BytesIO(tranlate_quat_to_byte(dna_str)).read(), crc_len_format=crc_len_format, number_of_chunks_len_format=number_of_chunks_len_format, packet_len_format=packet_len_format, id_len_format=id_len_format) decoded = self.input_new_packet(new_pack) else: while not (decoded or self.EOF): new_pack = self.getNextValidPacket( False, packet_len_format=packet_len_format, crc_len_format=crc_len_format, number_of_chunks_len_format=number_of_chunks_len_format, id_len_format=id_len_format) if new_pack is None: break decoded = self.input_new_packet(new_pack) print("Decoded Packets: " + str(self.correct)) print("Corrupt Packets : " + str(self.corrupt)) if not decoded and self.EOF: print("Unable to retrieve File from Chunks. Too much errors?") return -1