def trim_reads(data_folder, adaID, VERBOSE=0, summary=True, quality=25, blocksize=10, minlen_read1=100, minlen_read2=50): '''Trim low quality at the end of reads''' fn_in = get_read_filenames(data_folder, adaID, gzip=True) fn_out = get_read_filenames(data_folder, adaID, gzip=True, trimmed=True) n_good = 0 n_discarded = 0 with gzip.open(fn_in[0], 'rb') as fin1, \ gzip.open(fn_in[1], 'rb') as fin2, \ gzip.open(fn_out[0], 'rb') as fout1, \ gzip.open(fn_out[1], 'wb') as fout2: it1 = SeqIO.read(fin1, 'fastq') it2 = SeqIO.read(fin2, 'fastq') for irp, reads in enumerate(izip(it1, it2)): if VERBOSE >= 2: if not ((irp + 1) % 10000): print irp + 1 # Trim both reads trims = [ trim_read(read, quality=quality, blocksize=blocksize) for read in reads ] lrs = map(len, trims) if (lrs[0] > minlen_read1) and (lrs[1] > minlen_read2): SeqIO.write(trims[0], fout1, 'fastq') SeqIO.write(trims[1], fout2, 'fastq') n_good += 1 else: n_discarded += 1 if VERBOSE: print 'Trim lowq ends of reads:' print 'Good:', n_good print 'Discarded:', n_discarded # Write summary to file if summary: with open(get_trim_summary_filename(data_folder, adaID), 'a') as f: f.write('\n') f.write('Trim low quality ends results: adaID ' + adaID + '\n') f.write('Total:\t\t' + str(irp) + '\n') f.write('Good:\t\t' + str(n_good) + '\n') f.write('Discarded:\t' + str(n_discarded) + '\n')
def trim_reads(data_folder, adaID, VERBOSE=0, summary=True, quality=25, blocksize=10, minlen_read1=100, minlen_read2=50): '''Trim low quality at the end of reads''' fn_in = get_read_filenames(data_folder, adaID, gzip=True) fn_out = get_read_filenames(data_folder, adaID, gzip=True, trimmed=True) n_good = 0 n_discarded = 0 with gzip.open(fn_in[0], 'rb') as fin1, \ gzip.open(fn_in[1], 'rb') as fin2, \ gzip.open(fn_out[0], 'rb') as fout1, \ gzip.open(fn_out[1], 'wb') as fout2: it1 = SeqIO.read(fin1, 'fastq') it2 = SeqIO.read(fin2, 'fastq') for irp, reads in enumerate(izip(it1, it2)): if VERBOSE >= 2: if not ((irp + 1) % 10000): print irp + 1 # Trim both reads trims = [trim_read(read, quality=quality, blocksize=blocksize) for read in reads] lrs = map(len, trims) if (lrs[0] > minlen_read1) and (lrs[1] > minlen_read2): SeqIO.write(trims[0], fout1, 'fastq') SeqIO.write(trims[1], fout2, 'fastq') n_good += 1 else: n_discarded += 1 if VERBOSE: print 'Trim lowq ends of reads:' print 'Good:', n_good print 'Discarded:', n_discarded # Write summary to file if summary: with open(get_trim_summary_filename(data_folder, adaID), 'a') as f: f.write('\n') f.write('Trim low quality ends results: adaID '+adaID+'\n') f.write('Total:\t\t'+str(irp)+'\n') f.write('Good:\t\t'+str(n_good)+'\n') f.write('Discarded:\t'+str(n_discarded)+'\n')
def gunzip_demultiplexed_reads(data_folder, adaID, VERBOSE=0): '''Gunzip FastQ.gz demultiplexed files''' from hivwholeseq.sequencing.filenames import get_read_filenames fns = get_read_filenames(data_folder, adaID, gzip=True) for fn in fns: if not os.path.isfile(fn): continue sp.call(['gunzip', fn]) if VERBOSE >= 2: print 'Gunzipped:', fn
def premap_stampy(data_folder, adaID, VERBOSE=0, threads=1, summary=True, maxreads=-1, subsrate=0.05, gapopen=40, gapextend=3): '''Call stampy for actual mapping''' if VERBOSE: print 'Premapping: adaID ', adaID if summary: summary_filename = get_premap_summary_filename(data_folder, adaID) # Stampy can handle both gzipped and uncompressed fastq inputs input_filenames = get_read_filenames(data_folder, adaID, gzip=True) if not os.path.isfile(input_filenames[0]): input_filenames = get_read_filenames(data_folder, adaID, gzip=False) if not all(map(os.path.isfile, input_filenames)): raise OSError('Input files for mapping not found: ' + input_filenames[0]) # parallelize if requested if threads == 1: call_list = [ stampy_bin, '--overwrite', '-g', get_reference_premap_index_filename(data_folder, adaID, ext=False), '-h', get_reference_premap_hash_filename(data_folder, adaID, ext=False), '-o', get_premapped_filename(data_folder, adaID, type='sam'), '--insertsize=450', '--insertsd=100', '--substitutionrate=' + str(subsrate), '--gapopen=' + str(gapopen), '--gapextend=' + str(gapextend), ] if maxreads > 0: call_list.append('--numrecords=' + str(maxreads)) call_list.extend(['-M'] + input_filenames) call_list = map(str, call_list) if VERBOSE >= 2: print ' '.join(call_list) sp.call(call_list) if summary: with open(get_premap_summary_filename(data_folder, adaID), 'a') as f: f.write('\nStampy premapped (single thread).\n') # Convert to compressed BAM convert_sam_to_bam( get_premapped_filename(data_folder, adaID, type='bam')) if summary: with open(summary_filename, 'a') as f: f.write('\nSAM file converted to compressed BAM: '+\ get_premapped_filename(data_folder, adaID, type='bam')+'\n') else: # Multithreading works as follows: call qsub + stampy, monitor the process # IDs with qstat at regular intervals, and finally merge results with pysam output_file_parts = [ get_premapped_filename(data_folder, adaID, type='bam', part=(j + 1)) for j in xrange(threads) ] # Submit map script jobs_done = np.zeros(threads, bool) job_IDs = np.zeros(threads, 'S30') # Submit map call import hivwholeseq JOBDIR = hivwholeseq.__path__[0].rstrip('/') + '/' JOBLOGOUT = JOBDIR + 'logout' JOBLOGERR = JOBDIR + 'logerr' cluster_time = ['23:59:59', '1:59:59'] vmem = '8G' for j in xrange(threads): call_list = [ 'qsub', '-cwd', '-b', 'y', '-S', '/bin/bash', '-o', JOBLOGOUT, '-e', JOBLOGERR, '-N', adaID + ' p' + str(j + 1), '-l', 'h_rt=' + cluster_time[threads >= 30], '-l', 'h_vmem=' + vmem, stampy_bin, '--overwrite', '-g', get_reference_premap_index_filename( data_folder, adaID, ext=False), '-h', get_reference_premap_hash_filename( data_folder, adaID, ext=False), '-o', get_premapped_filename( data_folder, adaID, type='sam', part=(j + 1)), '--processpart=' + str(j + 1) + '/' + str(threads), '--insertsize=450', '--insertsd=100', '--substitutionrate=' + str(subsrate), '--gapopen=' + str(gapopen), '--gapextend=' + str(gapextend), '-M' ] + input_filenames call_list = map(str, call_list) if VERBOSE >= 2: print ' '.join(call_list) job_ID = sp.check_output(call_list) job_ID = job_ID.split()[2] job_IDs[j] = job_ID # Monitor output time_wait = 10 # secs while not jobs_done.all(): # Sleep some time time.sleep(time_wait) # Get the output of qstat to check the status of jobs qstat_output = sp.check_output(['qstat']) qstat_output = qstat_output.split( '\n')[:-1] # The last is an empty line if VERBOSE >= 3: print qstat_output if len(qstat_output) < 3: jobs_done[:] = True break else: qstat_output = [line.split()[0] for line in qstat_output[2:]] time_wait = 10 # secs for j in xrange(threads): if jobs_done[j]: continue if job_IDs[j] not in qstat_output: # Convert to BAM for merging if VERBOSE >= 1: print 'Convert premapped reads to BAM for merging: adaID '+\ adaID+', part '+str(j+1)+ ' of '+ \ str(threads) convert_sam_to_bam(output_file_parts[j]) # We do not need to wait if we did the conversion (it takes # longer than some secs) time_wait = 0 jobs_done[j] = True if summary: with open(summary_filename, 'a') as f: f.write('Stampy premapped (' + str(threads) + ' threads).\n') # Concatenate output files if VERBOSE >= 1: print 'Concatenate premapped reads: adaID ' + adaID + '...', output_filename = get_premapped_filename(data_folder, adaID, type='bam', unsorted=True) pysam.cat('-o', output_filename, *output_file_parts) if VERBOSE >= 1: print 'done.' if summary: with open(summary_filename, 'a') as f: f.write('BAM files concatenated (unsorted).\n') # Sort the file by read names (to ensure the pair_generator) # NOTE: we exclude the extension and the option -f because of a bug in samtools if VERBOSE >= 1: print 'Sort premapped reads: adaID ' + adaID output_filename_sorted = get_premapped_filename(data_folder, adaID, type='bam', unsorted=False) pysam.sort('-n', output_filename, output_filename_sorted[:-4]) if summary: with open(summary_filename, 'a') as f: f.write('Joint BAM file sorted.\n') # Reheader the file without BAM -> SAM -> BAM if VERBOSE >= 1: print 'Reheader premapped reads: adaID ' + adaID header_filename = get_premapped_filename(data_folder, adaID, type='sam', part=1) pysam.reheader(header_filename, output_filename_sorted) if summary: with open(summary_filename, 'a') as f: f.write('Joint BAM file reheaded.\n') if VERBOSE >= 1: print 'Remove temporary files: adaID ' + adaID remove_premapped_tempfiles(data_folder, adaID, VERBOSE=VERBOSE) if summary: with open(summary_filename, 'a') as f: f.write('Temp premapping files removed.\n') f.write('\n')
parser.add_argument('--adaID', required=True, help='Adapter ID to analyze (e.g. TS2)') args = parser.parse_args() seq_run = args.run VERBOSE = args.verbose maxreads = args.maxreads adaID = args.adaID # Specify the dataset dataset = MiSeq_runs[seq_run] data_folder = dataset['folder'] # Get some reads fns = get_read_filenames(data_folder, adaID, gzip=True) with gzip.open(fns[0], 'rb') as fh1, gzip.open(fns[1], 'rb') as fh2: reads_iter1 = SeqIO.parse(fh1, 'fastq') reads_iter2 = SeqIO.parse(fh2, 'fastq') read_pairs = [] inds = 20000 + np.arange(100000) np.random.shuffle(inds) inds = np.sort(inds[:10]) ii = 0 for irp, reads in enumerate(izip(reads_iter1, reads_iter2)): if irp == inds[ii]: read_pairs.append(reads) ii += 1 if ii == len(inds):
def demultiplex_reads_single_index(data_folder, data_filenames, adapters_designed, maxreads=-1, VERBOSE=0, summary=True): '''Demultiplex reads with single index adapters''' # Get the read filenames datafile_read1 = data_filenames['read1'] datafile_read2 = data_filenames['read2'] datafile_adapter = data_filenames['adapter'] # Open output files (compressed) fouts = { adaID: [ gzip.open(fn, 'wb', compresslevel=9) for fn in get_read_filenames(data_folder, adaID, gzip=True) ] for adaID, _ in adapters_designed } fouts['unclassified'] = [ gzip.open(fn, 'wb', compresslevel=9) for fn in get_unclassified_reads_filenames(data_folder, gzip=True) ] adapters_designed_inv = dict(map(reversed, adapters_designed)) adapters_strings = map(itemgetter(1), adapters_designed) # Make sure you close the files try: # Iterate over all reads (using fast iterators) with gzip.open(datafile_read1, 'rb') as fh1,\ gzip.open(datafile_read2, 'rb') as fh2,\ gzip.open(datafile_adapter, 'rb') as fha: if VERBOSE >= 3: print 'adaID' print '--------------------' adapters_found = Counter() for i, (read1, read2, adapter) in enumerate( izip(FGI(fh1), FGI(fh2), SeqIO.parse(fha, 'fastq'))): if i == maxreads: if VERBOSE: print 'Maxreads reached.' break # Print some output if VERBOSE and (not ((i + 1) % 10000)): print i + 1 # If the adapter is not known, add it to the list adapter_string = str(adapter.seq) adapters_found[adapter_string] += 1 # If the adapter does not match any know one, # throw into wastebin folder if adapter_string not in adapters_strings: adaID = 'unclassified' else: adaID = adapters_designed_inv[adapter_string] if VERBOSE >= 3: print adaID # Write sequences (append to file, manual but fast) fouts[adaID][0].write("@%s\n%s\n+\n%s\n" % read1) fouts[adaID][1].write("@%s\n%s\n+\n%s\n" % read2) if adapter_string not in adapters_strings: SeqIO.write(adapter, fouts['unclassified'][2], 'fastq') finally: # Close all adaIDs for fout in fouts.itervalues(): # Close both read 1 and read 2 (and barcode for unclassified) for fou in fout: fou.close() if summary: with open(get_demultiplex_summary_filename(data_folder), 'a') as f: f.write('\n') f.write('Total number of reads demultiplexed: ' + str(i + 1) + '\n') f.write('Adapters found across all reads:\n') for e in adapters_found.most_common(): f.write('\t'.join(map(str, e)) + '\n')
seq_run = args.run VERBOSE = args.verbose submit = args.submit maxreads = args.maxreads adaID = args.adaID savefig = args.savefig if submit: fork_self(seq_run, VERBOSE=VERBOSE, maxreads=maxreads, savefig=savefig) sys.exit() dataset = load_sequencing_run(seq_run) data_folder = dataset.folder read_len = dataset.cycles // 2 reads_filenames = get_read_filenames(data_folder, adaID, gzip=True) if not os.path.isfile(reads_filenames[0]): reads_filenames = get_read_filenames(data_folder, adaID, gzip=False) title = seq_run + ", " + adaID quality = quality_score_along_reads( read_len, reads_filenames, randomreads=(maxreads >= 1), maxreads=maxreads, VERBOSE=VERBOSE ) plot_cuts_quality_along_reads(data_folder, adaID, quality, title=title, VERBOSE=VERBOSE, savefig=savefig) # if plotfull: # plot_quality_along_reads(data_folder, adaID, title, # quality, VERBOSE=VERBOSE, # savefig=savefig)
help='Maximal number of reads to analyze') parser.add_argument('--adaID', required=True, help='Adapter ID to analyze (e.g. TS2)') args = parser.parse_args() seq_run = args.run VERBOSE = args.verbose maxreads = args.maxreads adaID = args.adaID # Specify the dataset dataset = MiSeq_runs[seq_run] data_folder = dataset['folder'] # Get some reads fns = get_read_filenames(data_folder, adaID, gzip=True) with gzip.open(fns[0], 'rb') as fh1, gzip.open(fns[1], 'rb') as fh2: reads_iter1 = SeqIO.parse(fh1, 'fastq') reads_iter2 = SeqIO.parse(fh2, 'fastq') read_pairs = [] inds = 20000 + np.arange(100000); np.random.shuffle(inds); inds = np.sort(inds[:10]) ii = 0 for irp, reads in enumerate(izip(reads_iter1, reads_iter2)): if irp == inds[ii]: read_pairs.append(reads) ii += 1 if ii == len(inds): break
def premap_stampy(data_folder, adaID, VERBOSE=0, threads=1, summary=True, maxreads=-1, subsrate=0.05, gapopen=40, gapextend=3): '''Call stampy for actual mapping''' if VERBOSE: print 'Premapping: adaID ', adaID if summary: summary_filename = get_premap_summary_filename(data_folder, adaID) # Stampy can handle both gzipped and uncompressed fastq inputs input_filenames = get_read_filenames(data_folder, adaID, gzip=True) if not os.path.isfile(input_filenames[0]): input_filenames = get_read_filenames(data_folder, adaID, gzip=False) if not all(map(os.path.isfile, input_filenames)): raise OSError('Input files for mapping not found: ' + input_filenames[0]) # parallelize if requested if threads == 1: call_list = [ stampy_bin, '--overwrite', '-g', get_reference_premap_index_filename(data_folder, adaID, ext=False), '-h', get_reference_premap_hash_filename(data_folder, adaID, ext=False), '-o', get_premapped_filename(data_folder, adaID, type='sam'), '--insertsize=450', '--insertsd=100', '--substitutionrate=' + str(subsrate), '--gapopen=' + str(gapopen), '--gapextend=' + str(gapextend), ] if maxreads > 0: call_list.append('--numrecords=' + str(maxreads)) call_list.extend(['-M'] + input_filenames) call_list = map(str, call_list) if VERBOSE >= 2: print ' '.join(call_list) sp.call(call_list) if summary: with open(get_premap_summary_filename(data_folder, adaID), 'a') as f: f.write('\nStampy premapped (single thread).\n') # Convert to compressed BAM convert_sam_to_bam( get_premapped_filename(data_folder, adaID, type='bam')) if summary: with open(summary_filename, 'a') as f: f.write('\nSAM file converted to compressed BAM: '+\ get_premapped_filename(data_folder, adaID, type='bam')+'\n') else: # Multithreading works as follows: call qsub + stampy, monitor the process # IDs with qstat at regular intervals, and finally merge results with pysam output_file_parts = [ get_premapped_filename( data_folder, adaID, type='bam', part=(j + 1)) for j in xrange(threads) ] # Submit map script jobs_done = np.zeros(threads, bool) job_IDs = np.zeros(threads, 'S30') # Submit map call import hivwholeseq JOBDIR = hivwholeseq.__path__[0].rstrip('/') + '/' JOBLOGOUT = JOBDIR + 'logout' JOBLOGERR = JOBDIR + 'logerr' cluster_time = ['23:59:59', '1:59:59'] vmem = '8G' for j in xrange(threads): call_list = [ 'qsub', '-cwd', '-b', 'y', '-S', '/bin/bash', '-o', JOBLOGOUT, '-e', JOBLOGERR, '-N', adaID + ' p' + str(j + 1), '-l', 'h_rt=' + cluster_time[threads >= 30], '-l', 'h_vmem=' + vmem, stampy_bin, '--overwrite', '-g', get_reference_premap_index_filename( data_folder, adaID, ext=False), '-h', get_reference_premap_hash_filename( data_folder, adaID, ext=False), '-o', get_premapped_filename( data_folder, adaID, type='sam', part=(j + 1)), '--processpart=' + str(j + 1) + '/' + str(threads), '--insertsize=450', '--insertsd=100', '--substitutionrate=' + str(subsrate), '--gapopen=' + str(gapopen), '--gapextend=' + str(gapextend), '-M' ] + input_filenames call_list = map(str, call_list) if VERBOSE >= 2: print ' '.join(call_list) job_ID = sp.check_output(call_list) job_ID = job_ID.split()[2] job_IDs[j] = job_ID # Monitor output time_wait = 10 # secs while not jobs_done.all(): # Sleep some time time.sleep(time_wait) # Get the output of qstat to check the status of jobs qstat_output = sp.check_output(['qstat']) qstat_output = qstat_output.split( '\n')[:-1] # The last is an empty line if VERBOSE >= 3: print qstat_output if len(qstat_output) < 3: jobs_done[:] = True break else: qstat_output = [line.split()[0] for line in qstat_output[2:]] time_wait = 10 # secs for j in xrange(threads): if jobs_done[j]: continue if job_IDs[j] not in qstat_output: # Convert to BAM for merging if VERBOSE >= 1: print 'Convert premapped reads to BAM for merging: adaID '+\ adaID+', part '+str(j+1)+ ' of '+ \ str(threads) convert_sam_to_bam(output_file_parts[j]) # We do not need to wait if we did the conversion (it takes # longer than some secs) time_wait = 0 jobs_done[j] = True if summary: with open(summary_filename, 'a') as f: f.write('Stampy premapped (' + str(threads) + ' threads).\n') # Concatenate output files if VERBOSE >= 1: print 'Concatenate premapped reads: adaID ' + adaID + '...', output_filename = get_premapped_filename( data_folder, adaID, type='bam', unsorted=True) pysam.cat('-o', output_filename, *output_file_parts) if VERBOSE >= 1: print 'done.' if summary: with open(summary_filename, 'a') as f: f.write('BAM files concatenated (unsorted).\n') # Sort the file by read names (to ensure the pair_generator) # NOTE: we exclude the extension and the option -f because of a bug in samtools if VERBOSE >= 1: print 'Sort premapped reads: adaID ' + adaID output_filename_sorted = get_premapped_filename( data_folder, adaID, type='bam', unsorted=False) pysam.sort('-n', output_filename, output_filename_sorted[:-4]) if summary: with open(summary_filename, 'a') as f: f.write('Joint BAM file sorted.\n') # Reheader the file without BAM -> SAM -> BAM if VERBOSE >= 1: print 'Reheader premapped reads: adaID ' + adaID header_filename = get_premapped_filename( data_folder, adaID, type='sam', part=1) pysam.reheader(header_filename, output_filename_sorted) if summary: with open(summary_filename, 'a') as f: f.write('Joint BAM file reheaded.\n') if VERBOSE >= 1: print 'Remove temporary files: adaID ' + adaID remove_premapped_tempfiles(data_folder, adaID, VERBOSE=VERBOSE) if summary: with open(summary_filename, 'a') as f: f.write('Temp premapping files removed.\n') f.write('\n')
if VERBOSE >= 3: print 'adaIDs', adaIDs # Iterate over adaIDs for adaID in adaIDs: outdir = os.getcwd()+'/q_control_'+adaID if not os.path.exists(outdir): try: os.mkdir(outdir) except: print "cannot make directory:",outdir sys.exit(1) # Repeat both for read 1 and read 2 datafiles = get_read_filenames(data_folder, adaID, filtered=False) datafiles = {'read 1': datafiles[0], 'read 2': datafiles[1]} histograms = {} for readname, datafile in datafiles.iteritems(): # Result data structures longest_good_block = np.zeros(L+1, int) first_bad_nucleotide = np.zeros(L+1, int) phred_score_dis = np.zeros((50,L)) # Read data with open(datafile, 'r') as f: seq_iter = SeqIO.parse(f, 'fastq')
def demultiplex_reads_single_index(data_folder, data_filenames, adapters_designed, maxreads=-1, VERBOSE=0, summary=True): '''Demultiplex reads with single index adapters''' # Get the read filenames datafile_read1 = data_filenames['read1'] datafile_read2 = data_filenames['read2'] datafile_adapter = data_filenames['adapter'] # Open output files (compressed) fouts = {adaID: [gzip.open(fn, 'wb', compresslevel=9) for fn in get_read_filenames(data_folder, adaID, gzip=True)] for adaID, _ in adapters_designed} fouts['unclassified'] = [gzip.open(fn, 'wb', compresslevel=9) for fn in get_unclassified_reads_filenames(data_folder, gzip=True)] adapters_designed_inv = dict(map(reversed, adapters_designed)) adapters_strings = map(itemgetter(1), adapters_designed) # Make sure you close the files try: # Iterate over all reads (using fast iterators) with gzip.open(datafile_read1, 'rb') as fh1,\ gzip.open(datafile_read2, 'rb') as fh2,\ gzip.open(datafile_adapter, 'rb') as fha: if VERBOSE >= 3: print 'adaID' print '--------------------' adapters_found = Counter() for i, (read1, read2, adapter) in enumerate(izip(FGI(fh1), FGI(fh2), SeqIO.parse(fha, 'fastq'))): if i == maxreads: if VERBOSE: print 'Maxreads reached.' break # Print some output if VERBOSE and (not ((i + 1) % 10000)): print i + 1 # If the adapter is not known, add it to the list adapter_string = str(adapter.seq) adapters_found[adapter_string] += 1 # If the adapter does not match any know one, # throw into wastebin folder if adapter_string not in adapters_strings: adaID = 'unclassified' else: adaID = adapters_designed_inv[adapter_string] if VERBOSE >= 3: print adaID # Write sequences (append to file, manual but fast) fouts[adaID][0].write("@%s\n%s\n+\n%s\n" % read1) fouts[adaID][1].write("@%s\n%s\n+\n%s\n" % read2) if adapter_string not in adapters_strings: SeqIO.write(adapter, fouts['unclassified'][2], 'fastq') finally: # Close all adaIDs for fout in fouts.itervalues(): # Close both read 1 and read 2 (and barcode for unclassified) for fou in fout: fou.close() if summary: with open(get_demultiplex_summary_filename(data_folder), 'a') as f: f.write('\n') f.write('Total number of reads demultiplexed: '+str(i+1)+'\n') f.write('Adapters found across all reads:\n') for e in adapters_found.most_common(): f.write('\t'.join(map(str, e))+'\n')
seq_run = args.run VERBOSE = args.verbose submit = args.submit maxreads = args.maxreads adaID = args.adaID savefig = args.savefig if submit: fork_self(seq_run, VERBOSE=VERBOSE, maxreads=maxreads, savefig=savefig) sys.exit() dataset = load_sequencing_run(seq_run) data_folder = dataset.folder read_len = dataset.cycles // 2 reads_filenames = get_read_filenames(data_folder, adaID, gzip=True) if not os.path.isfile(reads_filenames[0]): reads_filenames = get_read_filenames(data_folder, adaID, gzip=False) title = seq_run + ', ' + adaID quality = quality_score_along_reads(read_len, reads_filenames, randomreads=(maxreads >= 1), maxreads=maxreads, VERBOSE=VERBOSE) plot_cuts_quality_along_reads(data_folder, adaID, quality, title=title, VERBOSE=VERBOSE,