def run_exp(i):
    tmpfile_prefix = tmpfile + '.' + str(i)
    dna_storage.sample_reads_indel(tmpfile + '.oligos',
                                   tmpfile_prefix + '.reads',
                                   num_reads,
                                   sub_prob=sub_prob,
                                   del_prob=del_prob,
                                   ins_prob=ins_prob,
                                   frac_random_reads=frac_random_reads)
    #decode
    dna_storage.decode_data(tmpfile_prefix + '.reads',
                            oligo_length,
                            tmpfile_prefix + '.decoded',
                            bin_index_len,
                            BCH_bits,
                            LDPC_alpha,
                            ldpc_code_prefix,
                            file_size,
                            0.04,
                            sync=sync,
                            sync_pos=sync_pos)
    if filecmp.cmp(file_name, tmpfile_prefix + '.decoded'):
        correct = 1
    else:
        correct = 0
    os.remove(tmpfile_prefix + '.reads')
    os.remove(tmpfile_prefix + '.decoded')
    return correct
    index_len = (bin_index_len[i]+6*BCH_bits[i])//2
    sync_pos = (oligo_len-index_len)//2+index_len
    print('sync_pos',sync_pos)
    file_size = os.path.getsize(infile_path_prefix+infile_name[i])
    print('file_size (in bits):', file_size*8)
    num_reads = num_reads_init[i]
    # load all reads into list
    with open(read_file[i]) as f:
        all_reads = f.readlines()
    while True:
        print('num_reads:',num_reads)
        print('reading cost (bases/bit):', (num_reads*oligo_len)/(file_size*8))
        success = True
        for t in range(num_trials):
            sampled_reads = random.sample(all_reads,num_reads)
            with open(tmpfile_sampled_reads,'w') as f:
                for sampled_read in sampled_reads:
                    f.write(sampled_read)
            dna_storage.remove_barcodes_flexbar(tmpfile_sampled_reads,start_barcodes[i], end_barcodes[i], tmpfile_barcode_removed) 
            dna_storage.decode_data(tmpfile_barcode_removed,oligo_len,tmpfile_decoded,bin_index_len[i],BCH_bits[i],LDPC_alpha[i],LDPC_path_prefix+LDPC_code[i],file_size,eps=eps[i],sync=sync[i],sync_pos=sync_pos)
            if not filecmp.cmp(infile_path_prefix+infile_name[i],tmpfile_decoded):
                success = False
            os.remove(tmpfile_sampled_reads)
            os.remove(tmpfile_decoded)
            os.remove(tmpfile_barcode_removed)
            if not success:
                break
        if success:
            break
        num_reads += 500
Beispiel #3
0
 while True:
     #sample
     dna_storage.sample_reads_indel(tmpfile + '.oligos',
                                    tmpfile + '.reads',
                                    num_reads,
                                    sub_prob=sub_prob,
                                    del_prob=del_prob,
                                    ins_prob=ins_prob,
                                    frac_random_reads=frac_random_reads)
     #decode
     dna_storage.decode_data(tmpfile + '.reads',
                             oligo_length,
                             tmpfile + '.decoded',
                             bin_index_len,
                             BCH_bits,
                             LDPC_alpha,
                             ldpc_code_prefix,
                             file_size,
                             0.04,
                             sync=sync,
                             sync_pos=sync_pos)
     num_it += 1
     if filecmp.cmp(file_name, tmpfile + '.decoded'):
         num_correct += 1
     os.remove(tmpfile + '.decoded')
     print('num_it:', num_it)
     print('num_correct:', num_correct)
     if num_it >= 50 and num_it - num_correct >= 10:
         break
 error_rate = 1 - num_correct / num_it
 print('error rate', error_rate)
Beispiel #4
0
from params import *
import sys
sys.path.append('..')
import dna_storage
import filecmp
import os

path_to_oligo_files = '/raid/nanopore/shubham/LDPC_DNA_storage_data/oligo_files_1/'

for i in range(num_files):
    index_len = (bin_index_len[i]+6*BCH_bits[i])//2
    sync_pos = (oligo_len-index_len)//2+index_len
    print('sync_pos',sync_pos)
    file_size = os.path.getsize(infile_path_prefix+infile_name[i])
    dna_storage.encode_data(infile_path_prefix+infile_name[i],oligo_len,path_to_oligo_files+'reads.'+str(i),BCH_bits[i],LDPC_alpha[i],LDPC_path_prefix+LDPC_code[i],bin_index_len[i],sync = sync[i], sync_pos = sync_pos)
    # test decoding to see that there is no strange issue
    tmpfile_decoded = 'tmpfile_decoded'
    dna_storage.decode_data(path_to_oligo_files+'reads.'+str(i),oligo_len,tmpfile_decoded,bin_index_len[i],BCH_bits[i],LDPC_alpha[i],LDPC_path_prefix+LDPC_code[i],file_size,0.01,sync=sync[i],sync_pos=sync_pos)
    assert filecmp.cmp(infile_path_prefix+infile_name[i],tmpfile_decoded)
    os.remove(tmpfile_decoded)
    with open(path_to_oligo_files+'reads.'+str(i)) as f_reads, open(path_to_oligo_files+'oligos_'+str(i)+'.fa','w') as f_oligos:
        for j, line in enumerate(f_reads):
            f_oligos.write('>oligos_'+str(i)+'_'+start_barcodes[i]+'_'+end_barcodes[i]+'_'+str(j)+'\n')
            f_oligos.write(start_barcodes[i]+line.rstrip('\n')+end_barcodes[i]+'\n')

# NOTE: the files generated are slightly different due to some random padding, but that doesn't affect the decoding