def test_suite5(as_dna, chunk_size, dna_rules, error_correction, headerchunk, decoder_instance): chunksize = chunk_size number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file2, chunksize) dist = RaptorDistribution(number_of_chunks) pseudo_decoder = decoder_instance.pseudo_decoder( number_of_chunks=number_of_chunks) rules = dna_rules if as_dna else None encoder = RU10Encoder(file2, number_of_chunks, dist, pseudo_decoder=pseudo_decoder, rules=rules, error_correction=error_correction, insert_header=headerchunk) encoder.encode_to_packets() encoder.encode_to_packets() encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna) assert (pseudo_decoder.is_decoded() and pseudo_decoder.getSolvedCount() == pseudo_decoder.number_of_chunks) assert os.path.exists(out_dir2) decoder = decoder_instance(out_dir2, use_headerchunk=headerchunk, error_correction=error_correction) decoder.decode() assert decoder.is_decoded() and decoder.getSolvedCount( ) == encoder.number_of_chunks os.remove(file2) decoder.saveDecodedFile(print_to_output=True, null_is_terminator=True) assert os.path.exists('DEC_RU10_' + file2) and filecmp.cmp( 'DEC_RU10_' + file2, cmp_file2) shutil.rmtree(out_dir2)
def test_suite2(as_dna, chunk_size, dna_rules, error_correction_pair): chunksize = chunk_size number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunksize) dist = RaptorDistribution(number_of_chunks) decoder_instance = RU10Decoder pseudo_decoder = decoder_instance.pseudo_decoder( number_of_chunks=number_of_chunks) rules = dna_rules if as_dna else None encoder = RU10Encoder( file, number_of_chunks, dist, pseudo_decoder=pseudo_decoder, rules=rules, error_correction=error_correction_pair[0], ) encoder.encode_to_packets() encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna) assert (pseudo_decoder.is_decoded() and pseudo_decoder.getSolvedCount() == pseudo_decoder.number_of_chunks) assert os.path.exists(out_dir) decoder = decoder_instance(out_dir, error_correction=error_correction_pair[1]) decoder.decode() assert decoder.is_decoded() and decoder.getSolvedCount( ) == encoder.number_of_chunks os.remove(file) decoder.saveDecodedFile(print_to_output=False) assert os.path.exists(file) and filecmp.cmp(file, cmp_file) shutil.rmtree(out_dir)
def encode(file, dist_lst, asdna=True, chunk_size=50): """ :param file: :param dist_lst: :param asdna: :param chunk_size: :return: """ packets_needed = 0 packets = dict() number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size) dist = RaptorDistribution(number_of_chunks) dist.f = dist_lst d = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40] dist.d = d dna_rules = FastDNARules() if asdna: rules = dna_rules else: rules = None x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=False, rules=rules, error_correction=nocode, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=False, mode_1_bmp=False) x.prepare() y = RU10Decoder.pseudo_decoder(x.number_of_chunks, False) if y.distribution is None: # self.isPseudo and y.distribution = RaptorDistribution(x.number_of_chunks) y.distribution.f = dist_lst y.distribution.d = d y.number_of_chunks = x.number_of_chunks _, y.s, y.h = intermediate_symbols(x.number_of_chunks, y.distribution) y.createAuxBlocks() n = 0 for p_tmp in range(45): packets[p_tmp] = list() while n < number_of_chunks * 50: pack = x.create_new_packet() if packets_needed == 0: y.input_new_packet(pack) should_drop_packet(dna_rules, pack) if pack.get_degree() not in packets: packets[pack.get_degree()] = list() packets[pack.get_degree()].append(pack.error_prob) n += 1 if n >= number_of_chunks and y.is_decoded() and packets_needed == 0: packets_needed = n # we dont want to break, we want to generate #chunks * XXX packets! # break print("Packets created: " + str(sum([len(x) for x in packets.values()]))) return packets, (packets_needed - number_of_chunks) / 100.0
def encode(file, error_correction=nocode, insert_header=INSERT_HEADER, save_number_of_chunks=NUMBER_OF_CHUNKS_IN_PACKET, save_as_fasta=True, save_as_zip=True, overhead=5.0, upper_bound=1.0): number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunk_size=CHUNK_SIZE, insert_header=insert_header) print("Number of Chunks=%s" % number_of_chunks) #dist = ErlichZielinskiRobustSolitonDistribution(number_of_chunks, seed=2) #dist = IdealSolitonDistribution(number_of_chunks, seed=2) dist = RobustSolitonDistribution(number_of_chunks, seed=2) encoder = LTEncoder( file, number_of_chunks, dist, insert_header=insert_header, rules=DNARules_ErlichZielinski(), error_correction=error_correction, number_of_chunks_len_format="H", id_len_format="H", used_packets_len_format="H", save_number_of_chunks_in_packet=save_number_of_chunks, implicit_mode=IMPLICIT_MODE, drop_upper_bound=upper_bound) encoder.set_overhead_limit(overhead) encoder.encode_to_packets() if save_as_fasta: encoder.save_packets_fasta(file_ending="_LT", seed_is_filename=True) elif save_as_zip: encoder.save_packets_zip(save_as_dna=True, file_ending="_LT", seed_is_filename=True) else: encoder.save_packets(True, save_as_dna=True, seed_is_filename=True, clear_output=True) encoder.save_packets(split_to_multiple_files=True, save_as_dna=True) print("Number of Chunks=%s" % encoder.number_of_chunks) return encoder
def encode(self, file, asdna=True, error_correction=nocode, insert_header=False, save_number_of_chunks_in_packet=False, mode_1_bmp=False, chunk_size=50): packets_needed = 0 packets = dict() number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size) dist = RaptorDistribution(number_of_chunks) dist.f = self.X dist.d = self.d dna_rules = FastDNARules() if asdna: rules = dna_rules else: rules = None x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules, error_correction=error_correction, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, mode_1_bmp=mode_1_bmp) x.prepare() y = RU10Decoder.pseudo_decoder(x.number_of_chunks, False) if y.distribution is None: # self.isPseudo and y.distribution = RaptorDistribution(x.number_of_chunks) y.distribution.f = self.X y.distribution.d = self.d y.number_of_chunks = x.number_of_chunks _, y.s, y.h = intermediate_symbols(x.number_of_chunks, y.distribution) y.createAuxBlocks() n = 0 for p_tmp in range(45): packets[p_tmp] = list() while n < number_of_chunks * 50: pack = x.create_new_packet() if packets_needed == 0: y.input_new_packet(pack) should_drop_packet(dna_rules, pack) if pack.get_degree() not in packets: packets[pack.get_degree()] = list() packets[pack.get_degree()].append(pack.error_prob) n += 1 if n >= number_of_chunks and y.is_decoded() and packets_needed == 0: packets_needed = n # we dont want to break, we want to generate #chunks * XXX packets! # break print("Packets created: " + str(sum([len(x) for x in packets.values()]))) return packets, (packets_needed - number_of_chunks) / 100.0
def encode(file, chunk_size, dist, as_dna=True, repeats=15): """ Encodes the file to packets until the pseudo decoder was able to decode it 'repeats' times with the given chunk size and the distribution list. :param file: File to encode. :param chunk_size: Chunksize to use. :param dist: The distribution to calculate the average error and overhead for. :param as_dna: If true uses the DNA Rules. :param repeats: Number of En-/Decoding cycles. :return: """ degree_dict = {} overhead_lst = [] number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size, insert_header=False) distribution = RaptorDistribution(number_of_chunks) distribution.f = dist distribution.d = [x for x in range(0, 41)] if as_dna: rules = FastDNARules() else: rules = None encoder = RU10Encoder(file, number_of_chunks, distribution, insert_header=False, rules=rules, error_correction=nocode, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=False, mode_1_bmp=False) encoder.prepare() for _ in range(0, repeats): encoder.random_state = np.random.RandomState() # print("Master-Seed used: " + str(encoder.random_state.get_state()[1][0])) pseudo_decoder = create_pseudo_decoder(encoder.number_of_chunks, distribution) needed_packets = 0 while pseudo_decoder.GEPP is None or not pseudo_decoder.is_decoded(): needed_packets += 1 packet = encoder.create_new_packet() pseudo_decoder.input_new_packet(packet) should_drop_packet(rules, packet) if packet.get_degree() not in degree_dict: degree_dict[packet.get_degree()] = list() degree_dict[packet.get_degree()].append(min(packet.error_prob, 1.0)) overhead = (needed_packets - encoder.number_of_chunks) / 100.0 overhead_lst.append(overhead) return sum(overhead_lst) / len(overhead_lst), degree_dict
def test_suite4(as_dna, chunk_size, dna_rules, error_correction): chunksize = chunk_size number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunksize) dist = RaptorDistribution(number_of_chunks) decoder_instance = RU10Decoder pseudo_decoder = decoder_instance.pseudo_decoder( number_of_chunks=number_of_chunks) rules = dna_rules if as_dna else None encoder = RU10Encoder(file, number_of_chunks, dist, pseudo_decoder=pseudo_decoder, rules=rules, error_correction=error_correction[0]) encoder.encode_to_packets() encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna) assert (pseudo_decoder.is_decoded() and pseudo_decoder.getSolvedCount() == pseudo_decoder.number_of_chunks) assert os.path.exists(out_dir) # do not delete all packets (and break the last one). # that way the GEPP inside the decoder will get initialized and we might not end in a race-condition for # decoder.decode() sometimes raising an Exception.. for i in range(2, number_of_chunks): tmp_path = "RU10_" + file + "/" + str(i) + ".RU10_DNA" os.remove(tmp_path) with open("RU10_" + file + "/0.RU10_DNA", 'rb+') as tmp_file: # TODO we should flip bits in the middle rather than deleting 4 bytes at the end (we store crc32 / reedsolomon at the end) tmp_file.seek(-4, os.SEEK_END) tmp_file.truncate() decoder = decoder_instance(out_dir, error_correction=error_correction[1]) decoder.decode() assert decoder.corrupt == 1 assert not decoder.is_decoded() os.remove(file) with pytest.raises(AssertionError): decoder.saveDecodedFile(print_to_output=False, partial_decoding=False) assert not (os.path.exists(file) and filecmp.cmp(file, cmp_file)) shutil.rmtree(out_dir)
def test_suite(as_dna, decoder_instance): dir_path = os.getcwd() try: os.remove(dir_path + "/" + file) except: print("Not deleting, File did not exists") shutil.copyfile(dir_path + "/" + cmp_file, dir_path + "/" + file) print(as_dna) chunksize = 200 number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size( file, chunksize) dist = RaptorDistribution(number_of_chunks) pseudo_decoder = decoder_instance.pseudo_decoder( number_of_chunks=number_of_chunks) rules = FastDNARules() if as_dna else None encoder = RU10Encoder(file, number_of_chunks, dist, pseudo_decoder=pseudo_decoder, rules=rules, id_len_format="H", number_of_chunks_len_format="H", insert_header=True) encoder.encode_to_packets() encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna) assert (pseudo_decoder.is_decoded() and pseudo_decoder.getSolvedCount() == pseudo_decoder.number_of_chunks) assert os.path.exists(out_dir) decoder = decoder_instance(out_dir) decoder.decodeFolder(id_len_format="H", number_of_chunks_len_format="H") if isinstance(decoder, RU10BPDecoder): for pack in encoder.encodedPackets: decoder.input_new_packet(pack) assert decoder.is_decoded() and decoder.getSolvedCount( ) == encoder.number_of_chunks os.remove(file) decoder.saveDecodedFile(print_to_output=False) assert os.path.exists(file) and filecmp.cmp(file, cmp_file) shutil.rmtree(out_dir)
index.append(key) data.append(val) fig, (ax) = plt.subplots(ncols=1) ax.boxplot(data) ax.set_xticklabels(index) plt.show() plt.plot(num_list) plt.plot(mean_list) plt.show() if __name__ == "__main__": file = "../.INFILES/Dorn" chunk_size = 100 norepairsymbols = 6 save_number_of_chunks_in_packet = False insert_header = False rules = FastDNARules() error_correction = lambda x: reed_solomon_encode(x, norepairsymbols) number_of_chunks = 50 if chunk_size != 0: number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size) dist = RaptorDistribution(number_of_chunks) x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules, error_correction=error_correction, id_len_format="H", number_of_chunks_len_format="B", save_number_of_chunks_in_packet=save_number_of_chunks_in_packet) aa = QualityPacketGen(x)