if run_is_paired_end: # Aggregate read1s and read2s read1s, read2s = [], [] for read in reads: read1, read2 = read.split(':') read1s.append(read1) read2s.append(read2) # Combine reads groups combined_reads = [] for name, reads_group in [('read1', read1s), ('read2', read2s)]: combined_read_filename = os.path.join(output_dir, '{}.combined.{}.fastq.gz'.format(lib_prefix, name)) combined_reads.append(combined_read_filename) staging_delete.append(combined_read_filename) cat.run( Parameter(*[read for read in reads_group]), Redirect(type='1>', dest=combined_read_filename) ) # Update reads list reads = [':'.join(combined_reads)] else: # Combine reads combined_read_filename = os.path.join(output_dir, '{}.combined.fastq.gz'.format(lib_prefix)) staging_delete.append(combined_read_filename) cat.run( Parameter(*[read for read in reads]), Redirect(type='1>', dest=combined_read_filename) ) # Update reads list reads = [combined_read_filename]
for i, read in enumerate(reads): if run_is_paired_end: # Get paired-end reads, construct new filenames read1, read2 = read.split(':') trimmed_read1_filename = os.path.join(output_dir, lib_prefix + '_{}_read1.trimmed.fastq.gz'.format(i)) trimmed_read2_filename = os.path.join(output_dir, lib_prefix + '_{}_read2.trimmed.fastq.gz'.format(i)) # Run cutadapt cutadapt.run( Parameter('--quality-base={}'.format(config['cutadapt']['quality-base'])), Parameter('--minimum-length=5'), Parameter('--output={}'.format(trimmed_read1_filename)), Parameter('--paired-output={}'.format(trimmed_read2_filename)), Parameter('-a', forward_adapter if forward_adapter else 'ZZZ'), Parameter('-A', reverse_adapter if reverse_adapter else 'ZZZ'), Parameter('-q', '30'), Parameter(read1), Parameter(read2), Redirect(type='1>', dest=os.path.join(logs_dir, 'cutadapt.chicago.summary')) ) # Update reads list reads[i] = ':'.join([trimmed_read1_filename, trimmed_read2_filename]) else: # Construct new filename trimmed_read_filename = os.path.join(output_dir, lib_prefix + '_{}.trimmed.fastq.gz'.format(i)) # Run cutadapt cutadapt.run(
# Aggregate read1s and read2s read1s, read2s = [], [] for read in reads: read1, read2 = read.split(':') read1s.append(read1) read2s.append(read2) # Combine reads groups combined_reads = [] for name, reads_group in [('read1', read1s), ('read2', read2s)]: combined_read_filename = os.path.join( output_dir, '{}.combined.{}.fastq.gz'.format(lib_prefix, name)) combined_reads.append(combined_read_filename) staging_delete.append(combined_read_filename) cat.run(Parameter(*[read for read in reads_group]), Redirect(type='1>', dest=combined_read_filename)) # Update reads list reads = [':'.join(combined_reads)] else: # Combine reads combined_read_filename = os.path.join( output_dir, '{}.combined.fastq.gz'.format(lib_prefix)) staging_delete.append(combined_read_filename) cat.run(Parameter(*[read for read in reads]), Redirect(type='1>', dest=combined_read_filename)) # Update reads list reads = [combined_read_filename] # Trim adapters with cutadapt
__author__ = 'Dominic Fitzgerald' import sys from dive.components import Software, Parameter, Redirect sample = sys.argv[1] # Instantiate software # Software(software_name, software_path) picard = Software('picard', '/path/to/java /path/to/picard.jar') # Run software # Put as many Parameter and Redirect as needed # Order matters, so generally Redirect should be last # Parameter('arguments', 'separated', 'by', 'spaces') picard.run( Parameter('I=' + sample), Parameter('O=/path/to/output'), Parameter('-T', 'SplitNCigarReads'), Redirect(type='>', dest='out.txt') ) # Will produce and execute: # /path/to/java /path/to/picard.jar I=/path/to/input.bam O=/path/to/output -T SplitNCigarReads > out.txt
if run_is_paired_end: # Aggregate read1s and read2s read1s, read2s = [], [] for reads_set in reads: read1, read2 = reads_set.split(':') read1s.append(read1) read2s.append(read2) # Combine reads groups combined_reads = [] for name, reads_group in [('read1', read1s), ('read2', read2s)]: combined_read_filename = os.path.join(output_dir, '{}.combined.{}.fastq.gz'.format(lib_prefix, name)) combined_reads.append(combined_read_filename) staging_delete.append(combined_read_filename) cat.run( Parameter(*[read for read in reads_group]), Redirect(type='1>', dest=combined_read_filename) ) # Update reads list reads = [':'.join(combined_reads)] else: # Combine reads combined_read_filename = os.path.join(output_dir, '{}.combined.fastq.gz'.format(lib_prefix)) staging_delete.append(combined_read_filename) cat.run( Parameter(*[read for read in reads]), Redirect(type='1>', dest=combined_read_filename) ) # Update reads list reads = [combined_read_filename]
__author__ = 'Dominic Fitzgerald' import sys from dive.components import Software, Parameter, Redirect sample = sys.argv[1] # Instantiate software # Software(software_name, software_path) picard = Software('picard', '/path/to/java /path/to/picard.jar') # Run software # Put as many Parameter and Redirect as needed # Order matters, so generally Redirect should be last # Parameter('arguments', 'separated', 'by', 'spaces') picard.run(Parameter('I=' + sample), Parameter('O=/path/to/output'), Parameter('-T', 'SplitNCigarReads'), Redirect(type='>', dest='out.txt')) # Will produce and execute: # /path/to/java /path/to/picard.jar I=/path/to/input.bam O=/path/to/output -T SplitNCigarReads > out.txt