Exemple #1
0
def create_fasta_from_fastq(fastq_filename: str):
    out_lines = []
    rules = FastDNARules()
    with open(fastq_filename, "r") as in_file:
        lines = in_file.readlines()[1:][::4]
    for line in lines:
        err = rules.apply_all_rules(line[18:182])
        if err < 1.0:
            out_lines.append(line[18:182])
    with open("R:/out.fasta", "w") as out_file:
        for line in out_lines:
            out_file.write(">todo\n")
            out_file.write(line + "\n")
    return lines
Exemple #2
0
def main(file,
         number_of_chunks: int = 0,
         chunk_size: int = 0,
         error_correction: typing.Callable = nocode,
         as_dna: bool = False,
         insert_header: bool = False,
         save_number_of_chunks=False):
    if chunk_size != 0:
        number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(
            file, chunk_size)
    if as_dna:
        rules = FastDNARules()
    else:
        rules = None
    dist = ErlichZielinskiRobustSolitonDistribution(number_of_chunks, seed=2)
    encoder = LTEncoder(file,
                        number_of_chunks,
                        dist,
                        insert_header=insert_header,
                        rules=rules,
                        error_correction=error_correction,
                        number_of_chunks_len_format="H",
                        id_len_format="I",
                        used_packets_len_format="H",
                        save_number_of_chunks_in_packet=save_number_of_chunks,
                        implicit_mode=False)
    encoder.encode_to_packets()
    print("Number of Chunks=%s" % encoder.number_of_chunks)
    encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna)
def encode(p_output,
           file,
           as_dna=True,
           error_correction=nocode,
           insert_header=False,
           save_number_of_chunks_in_packet=False,
           overhead=6.0,
           clear_output=False):
    number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(
        file, CHUNK_SIZE)
    dist = RaptorDistribution(number_of_chunks)
    dna_rules = FastDNARules()
    if as_dna:
        rules = dna_rules
    else:
        rules = None
    x = RU10Encoder(
        file,
        number_of_chunks,
        dist,
        chunk_size=CHUNK_SIZE,
        insert_header=insert_header,
        rules=rules,
        error_correction=error_correction,
        id_len_format="H",
        number_of_chunks_len_format="B",
        save_number_of_chunks_in_packet=save_number_of_chunks_in_packet)
    x.set_overhead_limit(overhead)
    x.encode_to_packets()
    p_output.send(
        [ParallelPacket.from_packet(packet) for packet in x.encodedPackets])
    p_output.send("DONE")
    p_output.close()
    return 0
Exemple #4
0
def main(file: str,
         error_correction: typing.Callable[[typing.Any], typing.Any],
         asdna: bool = True,
         epsilon: float = 0.06,
         insert_header: bool = False):
    dist = OnlineDistribution(epsilon)
    number_of_chunks = dist.get_size()
    quality = 7
    if asdna:
        rules = FastDNARules()
    else:
        rules = None
    encoder = OnlineEncoder(file,
                            number_of_chunks,
                            dist,
                            epsilon,
                            quality,
                            error_correction=error_correction,
                            quality_len_format="B",
                            insert_header=insert_header,
                            check_block_number_len_format="H",
                            number_of_chunks_len_format="H",
                            rules=rules,
                            save_number_of_chunks_in_packet=False)
    encoder.set_overhead_limit(1.70)
    encoder.encode_file(split_to_multiple_files=True, save_as_dna=asdna)
    encoder.save_packets(True, save_as_dna=asdna)
    def encode(file, asdna=True,  error_correction=nocode, insert_header=False, save_number_of_chunks_in_packet=False,
               save_as_fasta=True, save_as_zip=True, overhead=0.40, epsilon=0.068, quality=7, upper_bound=1.0):
        dist = OnlineDistribution(epsilon)
        number_of_chunks = dist.get_size()
        dna_rules = FastDNARules()
        if asdna:
            rules = dna_rules
        else:
            rules = None
        encoder = OnlineEncoder(
            file, number_of_chunks, dist, epsilon, quality, error_correction=error_correction, quality_len_format="B",
            insert_header=insert_header, check_block_number_len_format="H", number_of_chunks_len_format="H", rules=rules,
            save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, drop_upper_bound=upper_bound)  # , pseudo_decoder=pseudo)
        encoder.set_overhead_limit(overhead)
        #encoder.encode_file(split_to_multiple_files=True, save_as_dna=asdna)
        encoder.encode_to_packets()
        if save_as_fasta:
            encoder.save_packets_fasta(file_ending="_Online", seed_is_filename=True)
        elif save_as_zip:
            encoder.save_packets_zip(save_as_dna=True, file_ending="_Online", seed_is_filename=True)
        else:
            encoder.save_packets(True, save_as_dna=True, seed_is_filename=True, clear_output=True)

        encoder.save_packets(split_to_multiple_files=True, save_as_dna=True)
        print("Number of Chunks=%s" % encoder.number_of_chunks)
        return encoder
Exemple #6
0
def get_error_sum(file,
                  number_of_chunks,
                  chunk_size,
                  seq_seed=None,
                  while_count=1000):
    max_seed = np.power(2, 8 * struct.calcsize(SEED_LEN_FORMAT))
    dist = RaptorDistribution(number_of_chunks)
    dna_rules = FastDNARules()
    error_correction = lambda x: reed_solomon_encode(x, NO_REPAIR_SYMBOLS)
    encoder = RU10Encoder(
        file,
        number_of_chunks,
        dist,
        chunk_size=chunk_size,
        insert_header=INSERT_HEADER,
        rules=dna_rules,
        error_correction=error_correction,
        id_len_format=SEED_LEN_FORMAT,
        number_of_chunks_len_format=NUMBER_OF_CHUNKS_LEN_FORMAT,
        save_number_of_chunks_in_packet=save_number_of_chunks_in_packet,
        prepend="",
        append="")
    encoder.prepare()
    i = 0
    res = []
    while i < while_count:
        if seq_seed is not None:
            if seq_seed + i >= max_seed:
                break
            packet = encoder.create_new_packet(seed=seq_seed + i)
        else:
            packet = encoder.create_new_packet()
        should_drop_packet(dna_rules, packet)
        res.append(packet.error_prob)
    return res
    def encode(file,
               asdna=True,
               chunk_size=DEFAULT_CHUNK_SIZE,
               error_correction=nocode,
               insert_header=False,
               save_number_of_chunks_in_packet=False,
               mode_1_bmp=False,
               prepend="",
               append="",
               upper_bound=0.5,
               save_as_fasta=True,
               save_as_zip=True,
               overhead=0.40):
        number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(
            file, chunk_size)
        dist = RaptorDistribution(number_of_chunks)
        if asdna:
            rules = FastDNARules()
        else:
            rules = None
        x = RU10Encoder(
            file,
            number_of_chunks,
            dist,
            insert_header=insert_header,
            pseudo_decoder=None,
            chunk_size=0,
            rules=rules,
            error_correction=error_correction,
            packet_len_format=PACKET_LEN_FORMAT,
            crc_len_format=CRC_LEN_FORMAT,
            number_of_chunks_len_format=NUMBER_OF_CHUNKS_LEN_FORMAT,
            id_len_format=ID_LEN_FORMAT,
            save_number_of_chunks_in_packet=save_number_of_chunks_in_packet,
            mode_1_bmp=mode_1_bmp,
            prepend=prepend,
            append=append,
            drop_upper_bound=upper_bound)
        x.set_overhead_limit(overhead)
        x.encode_to_packets()
        if save_as_fasta and asdna:
            x.save_packets_fasta(file_ending="_RU10", seed_is_filename=True)
        elif save_as_zip:
            x.save_packets_zip(save_as_dna=asdna,
                               file_ending="_RU10",
                               seed_is_filename=True)
        else:
            x.save_packets(True,
                           save_as_dna=asdna,
                           seed_is_filename=True,
                           clear_output=True)

        return x
Exemple #8
0
def fix_(in_file, out_file_str):
    correct = []
    rule = FastDNARules()
    with open(in_file, "r") as inf:
        lines = inf.readlines()
    for line in lines[1::2]:
        line = line.strip()
        if len(line) != 164:
            continue
        err_prob = rule.apply_all_rules(line)
        if err_prob < 1.0:
            correct.append((line, err_prob))
    with open(out_file_str, "w") as out_file:
        for line, err_prob in correct:
            out_file.write(f">%s\n" % err_prob)
            out_file.write(line.strip().replace("\n", "") + "\n")
    cleaned = find_dup_ids(out_file_str)
    with open(out_file_str, "w") as out_file:
        for line in cleaned:
            out_file.write(f">abc\n")
            out_file.write(line.strip().replace("\n", "") + "\n")
Exemple #9
0
def encode(file, dist_lst, asdna=True, chunk_size=50):
    """

    :param file:
    :param dist_lst:
    :param asdna:
    :param chunk_size:
    :return:
    """
    packets_needed = 0
    packets = dict()
    number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size)
    dist = RaptorDistribution(number_of_chunks)
    dist.f = dist_lst
    d = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
         29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
    dist.d = d
    dna_rules = FastDNARules()
    if asdna:
        rules = dna_rules
    else:
        rules = None
    x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=False, rules=rules,
                    error_correction=nocode, id_len_format="H", number_of_chunks_len_format="B",
                    save_number_of_chunks_in_packet=False, mode_1_bmp=False)
    x.prepare()
    y = RU10Decoder.pseudo_decoder(x.number_of_chunks, False)
    if y.distribution is None:  # self.isPseudo and
        y.distribution = RaptorDistribution(x.number_of_chunks)
        y.distribution.f = dist_lst
        y.distribution.d = d
        y.number_of_chunks = x.number_of_chunks
        _, y.s, y.h = intermediate_symbols(x.number_of_chunks, y.distribution)
        y.createAuxBlocks()
    n = 0
    for p_tmp in range(45):
        packets[p_tmp] = list()
    while n < number_of_chunks * 50:
        pack = x.create_new_packet()
        if packets_needed == 0:
            y.input_new_packet(pack)
        should_drop_packet(dna_rules, pack)
        if pack.get_degree() not in packets:
            packets[pack.get_degree()] = list()
        packets[pack.get_degree()].append(pack.error_prob)
        n += 1
        if n >= number_of_chunks and y.is_decoded() and packets_needed == 0:
            packets_needed = n
            # we dont want to break, we want to generate #chunks * XXX packets!
            # break
    print("Packets created: " + str(sum([len(x) for x in packets.values()])))
    return packets, (packets_needed - number_of_chunks) / 100.0
Exemple #10
0
def create_packets_e_prob(start_num: int, normed_dist: ndarray, number_of_packets: int, rules=None):
    dist_obj = RaptorDistribution(__NUM_CHUNKS)
    dist_obj.f = normed_dist
    dist_obj.d = [x for x in range(0, 41)]
    encoder = RU10Encoder(file=__FILE, number_of_chunks=__NUM_CHUNKS, distribution=dist_obj, insert_header=False)
    encoder.prepare()
    if rules is None:
        rules = FastDNARules()
    packets_e_prob = []
    for i in range(start_num, start_num + number_of_packets):
        packet = encoder.create_new_packet(seed=i)
        should_drop_packet(rules, packet)
        packets_e_prob.append(packet.error_prob)
        del packet
    del encoder
    return packets_e_prob
Exemple #11
0
def main(in_file: str,
         num_chunks=0,
         chunk_size=0,
         as_dna=True,
         err_correction: typing.Callable[[typing.Any], typing.Any] = nocode,
         insert_header=False,
         save_number_of_chunks_in_packet=False,
         mode_1_bmp=False):
    if chunk_size != 0:
        num_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(
            in_file, chunk_size)
        dist = RaptorDistribution(num_chunks)
    elif num_chunks != 0:
        dist = RaptorDistribution.RaptorDistribution(num_chunks)
    else:
        print("Aborting. Please set either chunk_size or number_of_chunks!")
        return
    if as_dna:
        rules = FastDNARules()
    else:
        rules = None
    x = RU10Encoder(
        in_file,
        num_chunks,
        dist,
        chunk_size=chunk_size,
        insert_header=insert_header,
        rules=rules,
        error_correction=err_correction,
        id_len_format="H",
        number_of_chunks_len_format="B",
        save_number_of_chunks_in_packet=save_number_of_chunks_in_packet,
        mode_1_bmp=mode_1_bmp)
    x.encode_to_packets()
    x.save_packets(True, save_as_dna=as_dna, seed_is_filename=False)
    conf = {
        'error_correction': e_correction,
        'repair_symbols': norepair_symbols,
        'asdna': as_dna,
        'number_of_splits': 0,
        'find_minimum_mode': True,
        'seq_seed': False
    }
    x.save_config_file(conf, section_name="RU10_" + in_file)
Exemple #12
0
    def encode(self, file, asdna=True, error_correction=nocode, insert_header=False,
               save_number_of_chunks_in_packet=False, mode_1_bmp=False, chunk_size=50):
        packets_needed = 0
        packets = dict()
        number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size)
        dist = RaptorDistribution(number_of_chunks)
        dist.f = self.X
        dist.d = self.d
        dna_rules = FastDNARules()
        if asdna:
            rules = dna_rules
        else:
            rules = None
        x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules,
                        error_correction=error_correction, id_len_format="H", number_of_chunks_len_format="B",
                        save_number_of_chunks_in_packet=save_number_of_chunks_in_packet, mode_1_bmp=mode_1_bmp)
        x.prepare()
        y = RU10Decoder.pseudo_decoder(x.number_of_chunks, False)
        if y.distribution is None:  # self.isPseudo and
            y.distribution = RaptorDistribution(x.number_of_chunks)
            y.distribution.f = self.X
            y.distribution.d = self.d
            y.number_of_chunks = x.number_of_chunks
            _, y.s, y.h = intermediate_symbols(x.number_of_chunks, y.distribution)
            y.createAuxBlocks()
        n = 0
        for p_tmp in range(45):
            packets[p_tmp] = list()
        while n < number_of_chunks * 50:
            pack = x.create_new_packet()
            if packets_needed == 0:
                y.input_new_packet(pack)
            should_drop_packet(dna_rules, pack)

            if pack.get_degree() not in packets:
                packets[pack.get_degree()] = list()
            packets[pack.get_degree()].append(pack.error_prob)
            n += 1
            if n >= number_of_chunks and y.is_decoded() and packets_needed == 0:
                packets_needed = n
                # we dont want to break, we want to generate #chunks * XXX packets!
                # break
        print("Packets created: " + str(sum([len(x) for x in packets.values()])))
        return packets, (packets_needed - number_of_chunks) / 100.0
Exemple #13
0
def encode(file, chunk_size, dist, as_dna=True, repeats=15):
    """
    Encodes the file to packets until the pseudo decoder was able to decode it 'repeats' times with the given chunk size
    and the distribution list.
    :param file: File to encode.
    :param chunk_size: Chunksize to use.
    :param dist: The distribution to calculate the average error and overhead for.
    :param as_dna: If true uses the DNA Rules.
    :param repeats: Number of En-/Decoding cycles.
    :return:
    """
    degree_dict = {}
    overhead_lst = []
    number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size, insert_header=False)
    distribution = RaptorDistribution(number_of_chunks)
    distribution.f = dist
    distribution.d = [x for x in range(0, 41)]
    if as_dna:
        rules = FastDNARules()
    else:
        rules = None
    encoder = RU10Encoder(file, number_of_chunks, distribution, insert_header=False, rules=rules,
                          error_correction=nocode, id_len_format="H", number_of_chunks_len_format="B",
                          save_number_of_chunks_in_packet=False, mode_1_bmp=False)
    encoder.prepare()
    for _ in range(0, repeats):
        encoder.random_state = np.random.RandomState()
        # print("Master-Seed used: " + str(encoder.random_state.get_state()[1][0]))
        pseudo_decoder = create_pseudo_decoder(encoder.number_of_chunks, distribution)
        needed_packets = 0
        while pseudo_decoder.GEPP is None or not pseudo_decoder.is_decoded():
            needed_packets += 1
            packet = encoder.create_new_packet()
            pseudo_decoder.input_new_packet(packet)
            should_drop_packet(rules, packet)
            if packet.get_degree() not in degree_dict:
                degree_dict[packet.get_degree()] = list()
            degree_dict[packet.get_degree()].append(min(packet.error_prob, 1.0))
        overhead = (needed_packets - encoder.number_of_chunks) / 100.0
        overhead_lst.append(overhead)
    return sum(overhead_lst) / len(overhead_lst), degree_dict
Exemple #14
0
def test_suite(as_dna, decoder_instance):
    dir_path = os.getcwd()
    try:
        os.remove(dir_path + "/" + file)
    except:
        print("Not deleting, File did not exists")
    shutil.copyfile(dir_path + "/" + cmp_file, dir_path + "/" + file)
    print(as_dna)
    chunksize = 200
    number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(
        file, chunksize)
    dist = RaptorDistribution(number_of_chunks)
    pseudo_decoder = decoder_instance.pseudo_decoder(
        number_of_chunks=number_of_chunks)
    rules = FastDNARules() if as_dna else None
    encoder = RU10Encoder(file,
                          number_of_chunks,
                          dist,
                          pseudo_decoder=pseudo_decoder,
                          rules=rules,
                          id_len_format="H",
                          number_of_chunks_len_format="H",
                          insert_header=True)
    encoder.encode_to_packets()
    encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna)
    assert (pseudo_decoder.is_decoded() and pseudo_decoder.getSolvedCount()
            == pseudo_decoder.number_of_chunks)
    assert os.path.exists(out_dir)
    decoder = decoder_instance(out_dir)
    decoder.decodeFolder(id_len_format="H", number_of_chunks_len_format="H")
    if isinstance(decoder, RU10BPDecoder):
        for pack in encoder.encodedPackets:
            decoder.input_new_packet(pack)
    assert decoder.is_decoded() and decoder.getSolvedCount(
    ) == encoder.number_of_chunks
    os.remove(file)
    decoder.saveDecodedFile(print_to_output=False)
    assert os.path.exists(file) and filecmp.cmp(file, cmp_file)
    shutil.rmtree(out_dir)
Exemple #15
0
            index.append(key)
            data.append(val)

        fig, (ax) = plt.subplots(ncols=1)
        ax.boxplot(data)
        ax.set_xticklabels(index)
        plt.show()

        plt.plot(num_list)
        plt.plot(mean_list)
        plt.show()


if __name__ == "__main__":
    file = "../.INFILES/Dorn"
    chunk_size = 100
    norepairsymbols = 6
    save_number_of_chunks_in_packet = False
    insert_header = False
    rules = FastDNARules()
    error_correction = lambda x: reed_solomon_encode(x, norepairsymbols)
    number_of_chunks = 50
    if chunk_size != 0:
        number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(file, chunk_size)

    dist = RaptorDistribution(number_of_chunks)
    x = RU10Encoder(file, number_of_chunks, dist, chunk_size=chunk_size, insert_header=insert_header, rules=rules,
                    error_correction=error_correction, id_len_format="H", number_of_chunks_len_format="B",
                    save_number_of_chunks_in_packet=save_number_of_chunks_in_packet)
    aa = QualityPacketGen(x)
Exemple #16
0
def test_homopolymers(params):
    assert fast_comp(DNARules.homopolymers(params[0]), 3) == fast_comp(
        FastDNARules.homopolymers(params[0])) == params[1]
Exemple #17
0
def test_c_permutation(params):
    assert fast_comp(DNARules.c_permutation(params[0])) == fast_comp(
        FastDNARules.c_permutation(params[0])) == params[1]
Exemple #18
0
def test_random_permutations(params):
    assert DNARules.random_permutations(
        params[0]) == FastDNARules.random_permutations(params[0]) == params[1]
Exemple #19
0
def test_illegal_symbols(params):
    assert DNARules.illegal_symbols(params[0]) == FastDNARules.illegal_symbols(
        params[0]) == params[1]
Exemple #20
0
def test_trinucleotid_runs(params):
    assert fast_comp(DNARules.trinucleotid_runs(params[0])) == fast_comp(FastDNARules.trinucleotid_runs(params[0])) == \
           params[1]
Exemple #21
0
def test_long_strands(params):
    assert abs(
        DNARules.long_strands(params) -
        FastDNARules.long_strands(params) < 0.01)
Exemple #22
0
def test_repeat_region(params):
    assert fast_comp(DNARules.repeatRegion(params[0])) == fast_comp(
        FastDNARules.repeatRegion(params[0])) == params[1]
Exemple #23
0
def test_small_repeat_region(params):
    assert fast_comp(DNARules.smallRepeatRegion(params[0])) == fast_comp(FastDNARules.smallRepeatRegion(params[0])) == \
           params[1]
Exemple #24
0
def test_gc_content(params):
    assert round(DNARules.overall_gc_content(params[0]), 2) == round(FastDNARules.overall_gc_content(params[0]), 2) == \
           params[1]
Exemple #25
0
def test_simple_motif_search(params):
    assert fast_comp(DNARules.simple_motif_search(params[0])) == fast_comp(
        FastDNARules.simple_motif_search(params[0])) == params[1]
Exemple #26
0
def test_motif_regex_search(params):
    assert fast_comp(DNARules.motif_regex_search(params[0])) == fast_comp(FastDNARules.motif_regex_search(params[0])) == \
           params[1]
Exemple #27
0
def test_suite(as_dna, decoder_instance, distribution, use_header,
               implicit_mode):
    dir_path = os.getcwd()
    try:
        os.remove(dir_path + "/" + file)
    except:
        print("Not deleting, File did not exists")
    shutil.copyfile(dir_path + "/" + cmp_file, dir_path + "/" + file)
    chunksize = 200
    number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(
        file, chunksize)
    pseudo_decoder = decoder_instance.pseudo_decoder(number_of_chunks)
    if distribution == "robust":
        dist = RobustSolitonDistribution(S=number_of_chunks, delta=0.2, seed=2)
    elif distribution == "ideal":
        dist = IdealSolitonDistribution(S=number_of_chunks, seed=2)
    else:
        dist = ErlichZielinskiRobustSolitonDistribution(k=number_of_chunks,
                                                        delta=0.2,
                                                        seed=2)
    rules = FastDNARules() if as_dna else None
    encoder = LTEncoder(file,
                        number_of_chunks,
                        dist,
                        chunk_size=chunksize,
                        pseudo_decoder=pseudo_decoder,
                        rules=rules,
                        insert_header=use_header,
                        number_of_chunks_len_format="H",
                        id_len_format="H",
                        used_packets_len_format="H",
                        implicit_mode=implicit_mode)
    encoder.encode_to_packets()
    encoder.save_packets(split_to_multiple_files=True, save_as_dna=as_dna)
    assert pseudo_decoder.is_decoded() and pseudo_decoder.getSolvedCount(
    ) == encoder.number_of_chunks
    assert os.path.exists(out_dir)
    decoder = decoder_instance(out_dir,
                               use_headerchunk=use_header,
                               dist=dist,
                               implicit_mode=implicit_mode)
    decoder.decodeFolder(number_of_chunks_len_format="H",
                         seed_len_format="H",
                         degree_len_format="H")
    assert decoder.is_decoded() and decoder.getSolvedCount(
    ) == encoder.number_of_chunks
    os.remove(file)
    decoder.saveDecodedFile(print_to_output=False)
    if not use_header:
        out_file = "DEC_LT_" + file
    else:
        out_file = file
    assert os.path.exists(out_file) and filecmp.cmp(out_file, cmp_file)
    if decoder_instance == LTBPDecoder:
        # since ApproxDecoder defines an upper bound Gauss-Decoder MUST be able to decode!
        decoder = LTDecoder(out_dir,
                            use_headerchunk=use_header,
                            dist=dist,
                            implicit_mode=implicit_mode)
        decoder.decodeFolder(number_of_chunks_len_format="H",
                             seed_len_format="H",
                             degree_len_format="H")
        assert (decoder.is_decoded()
                and decoder.getSolvedCount() == encoder.number_of_chunks)
        os.remove(out_file)
        decoder.saveDecodedFile(print_to_output=False)
        assert os.path.exists(out_file) and filecmp.cmp(out_file, cmp_file)
    shutil.rmtree(out_dir)
Exemple #28
0
def run(seq_seed=None,
        file='logo.jpg',
        repair_symbols=2,
        insert_header=False,
        error_correction=reed_solomon_encode,
        save_number_of_chunks_in_packet=False,
        l_size=1000,
        while_count=1000,
        chunk_size=0,
        number_of_chunks=300,
        prepend="",
        append="",
        seed_len_format=DEFAULT_ID_LEN_FORMAT,
        number_of_chunks_len_format=DEFAULT_NUMBER_OF_CHUNKS_LEN_FORMAT,
        method='RU10',
        mode1bmp=False,
        drop_above=0.4,
        packets_to_create=None):
    global counter
    if chunk_size != 0:
        number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(
            file, chunk_size)
    dna_rules = FastDNARules()
    if packets_to_create is None:
        packets_to_create = math.pow(2, 8 * struct.calcsize(seed_len_format))
    rules = dna_rules
    if repair_symbols != 0:
        dist, error_correction = get_err_dist(method, number_of_chunks,
                                              repair_symbols)
    else:
        dist = RaptorDistribution(number_of_chunks)
    if method == 'RU10':
        x = RU10Encoder(
            file,
            number_of_chunks,
            dist,
            chunk_size=chunk_size,
            insert_header=insert_header,
            rules=rules,
            error_correction=error_correction,
            id_len_format=seed_len_format,
            number_of_chunks_len_format=number_of_chunks_len_format,
            save_number_of_chunks_in_packet=save_number_of_chunks_in_packet,
            mode_1_bmp=mode1bmp,
            prepend=prepend,
            append=append)
        x.prepare()
    elif method == 'LT':
        x = LTEncoder(
            file,
            number_of_chunks,
            dist,
            chunk_size=chunk_size,
            insert_header=insert_header,
            rules=rules,
            error_correction=error_correction,
            number_of_chunks_len_format=number_of_chunks_len_format,
            id_len_format=seed_len_format,
            save_number_of_chunks_in_packet=save_number_of_chunks_in_packet)
        x.prepareEncoder()
    elif method == 'Online':
        number_of_chunks = dist.get_size()
        x = OnlineEncoder(
            file,
            number_of_chunks,
            dist,
            ONLINE_EPS,
            ONLINE_QUALITY,
            error_correction=error_correction,
            quality_len_format="B",
            insert_header=False,
            check_block_number_len_format=seed_len_format,
            number_of_chunks_len_format=number_of_chunks_len_format,
            rules=rules,
            save_number_of_chunks_in_packet=False)
        x.prepare()
    else:
        raise NotImplementedError("Choose: RU10, LT or Online")
    i = 0
    tmp_list = []
    while i < while_count:
        if seq_seed is not None:
            if seq_seed + i >= packets_to_create:
                break
            packet = x.create_new_packet(seed=seq_seed + i)
        else:
            packet = x.create_new_packet()
        if i == 0:
            print(f"%i , %s" % (len(
                packet.get_dna_struct(True)), packet.get_dna_struct(True)))
        _ = should_drop_packet(rules, packet)
        if packet.error_prob <= drop_above and (
                len(tmp_list) < l_size
                or packet.error_prob < tmp_list[-1].error_prob):
            if packet not in tmp_list:
                bisect.insort_left(tmp_list, packet)
            else:
                elem = next((x for x in tmp_list if x == packet), None)
                if packet < elem:
                    tmp_list.remove(elem)
                    del elem
                    bisect.insort_left(tmp_list, packet)
            if len(tmp_list) > l_size:
                for ele1m in tmp_list[l_size + 1:]:
                    del ele1m
                tmp_list = tmp_list[:l_size]

        else:
            del packet
        i += 1
        # += operation is not atomic, so we need to get a lock:
        with counter.get_lock():
            counter.value += 1
    # save_packets_fasta(tmp_list, out_file=method + "_out_partial", file_ending="." + method + "_DNA",
    #                   clear_output=False)
    conf = {
        'error_correction': error_correction,
        'repair_symbols': repair_symbols,
        'number_of_splits': _number_of_splits,
        'find_minimum_mode': True,
        'seq_seed': seq_seed
    }
    # x.save_config_file(conf, section_name=method + "_" + file)
    if x.progress_bar is not None:
        x.progress_bar.finish()
    return [ParallelPacket.from_packet(p) for p in tmp_list]
def run(seq_seed=None,
        file='logo.jpg',
        asdna=True,
        insert_header=False,
        error_correction=reed_solomon_encode,
        save_number_of_chunks_in_packet=False,
        l_size=1000,
        while_count=1000,
        chunk_size=0,
        number_of_chunks=300,
        prepend="",
        append="",
        seed_len_format=ID_LEN_FORMAT,
        drop_above=1.0):
    if chunk_size != 0:
        number_of_chunks = Encoder.get_number_of_chunks_for_file_with_chunk_size(
            file, chunk_size)
    dist = RaptorDistribution(number_of_chunks)
    dna_rules = FastDNARules()
    if asdna:
        rules = dna_rules
    else:
        rules = None
    x = RU10Encoder(
        file,
        number_of_chunks,
        dist,
        chunk_size=chunk_size,
        insert_header=insert_header,
        rules=rules,
        error_correction=error_correction,
        id_len_format=seed_len_format,
        number_of_chunks_len_format=NUMBER_OF_CHUNKS_LEN_FORMAT,
        save_number_of_chunks_in_packet=save_number_of_chunks_in_packet,
        prepend=prepend,
        append=append)
    x.prepare()
    i = 0
    tmp_list = []
    while i < while_count:
        if seq_seed is not None:
            if seq_seed + i >= NUMBER_OF_PACKETS_TO_CREATE:
                break
            packet = x.create_new_packet(seed=seq_seed + i)
        else:
            packet = x.create_new_packet()
        should_drop_packet(rules, packet)
        if packet.error_prob <= drop_above and (
                len(tmp_list) < l_size
                or packet.error_prob < tmp_list[-1].error_prob):
            if packet not in tmp_list:
                bisect.insort_left(tmp_list, packet)
            else:
                elem = next((x for x in tmp_list if x == packet), None)
                if packet < elem:
                    tmp_list.remove(elem)
                    bisect.insort_left(tmp_list, packet)
            if len(tmp_list) > l_size:
                tmp_list = tmp_list[:l_size]
        i += 1
    print([x.error_prob for x in tmp_list])
    conf = {
        'error_correction': e_correction,
        'repair_symbols': _repair_symbols,
        'asdna': asdna,
        'number_of_splits': _number_of_splits,
        'find_minimum_mode': True,
        'seq_seed': seq_seed
    }
    x.save_config_file(conf, section_name="RU10_" + file)
    return [ParallelPacket.from_packet(p) for p in tmp_list]
Exemple #30
0
def test_windowed_gc_content(params):
    assert abs(
        DNARules.windowed_gc_content(params) -
        FastDNARules.windowed_gc_content(params)) == 0