Example #1
0
        if run_is_paired_end:
            # Aggregate read1s and read2s
            read1s, read2s = [], []
            for read in reads:
                read1, read2 = read.split(':')
                read1s.append(read1)
                read2s.append(read2)

            # Combine reads groups
            combined_reads = []
            for name, reads_group in [('read1', read1s), ('read2', read2s)]:
                combined_read_filename = os.path.join(output_dir, '{}.combined.{}.fastq.gz'.format(lib_prefix, name))
                combined_reads.append(combined_read_filename)
                staging_delete.append(combined_read_filename)
                cat.run(
                    Parameter(*[read for read in reads_group]),
                    Redirect(type='1>', dest=combined_read_filename)
                )

            # Update reads list
            reads = [':'.join(combined_reads)]
        else:
            # Combine reads
            combined_read_filename = os.path.join(output_dir, '{}.combined.fastq.gz'.format(lib_prefix))
            staging_delete.append(combined_read_filename)
            cat.run(
                Parameter(*[read for read in reads]),
                Redirect(type='1>', dest=combined_read_filename)
            )

            # Update reads list
            reads = [combined_read_filename]
Example #2
0
        for i, read in enumerate(reads):
            if run_is_paired_end:
                # Get paired-end reads, construct new filenames
                read1, read2 = read.split(':')
                trimmed_read1_filename = os.path.join(output_dir,
                                                      lib_prefix + '_{}_read1.trimmed.fastq.gz'.format(i))
                trimmed_read2_filename = os.path.join(output_dir,
                                                      lib_prefix + '_{}_read2.trimmed.fastq.gz'.format(i))

                # Run cutadapt
                cutadapt.run(
                    Parameter('--quality-base={}'.format(config['cutadapt']['quality-base'])),
                    Parameter('--minimum-length=5'),
                    Parameter('--output={}'.format(trimmed_read1_filename)),
                    Parameter('--paired-output={}'.format(trimmed_read2_filename)),
                    Parameter('-a', forward_adapter if forward_adapter else 'ZZZ'),
                    Parameter('-A', reverse_adapter if reverse_adapter else 'ZZZ'),
                    Parameter('-q', '30'),
                    Parameter(read1),
                    Parameter(read2),
                    Redirect(type='1>', dest=os.path.join(logs_dir, 'cutadapt.chicago.summary'))
                )

                # Update reads list
                reads[i] = ':'.join([trimmed_read1_filename, trimmed_read2_filename])
            else:
                # Construct new filename
                trimmed_read_filename = os.path.join(output_dir,
                                                     lib_prefix + '_{}.trimmed.fastq.gz'.format(i))

                # Run cutadapt
                cutadapt.run(
Example #3
0
            # Aggregate read1s and read2s
            read1s, read2s = [], []
            for read in reads:
                read1, read2 = read.split(':')
                read1s.append(read1)
                read2s.append(read2)

            # Combine reads groups
            combined_reads = []
            for name, reads_group in [('read1', read1s), ('read2', read2s)]:
                combined_read_filename = os.path.join(
                    output_dir,
                    '{}.combined.{}.fastq.gz'.format(lib_prefix, name))
                combined_reads.append(combined_read_filename)
                staging_delete.append(combined_read_filename)
                cat.run(Parameter(*[read for read in reads_group]),
                        Redirect(type='1>', dest=combined_read_filename))

            # Update reads list
            reads = [':'.join(combined_reads)]
        else:
            # Combine reads
            combined_read_filename = os.path.join(
                output_dir, '{}.combined.fastq.gz'.format(lib_prefix))
            staging_delete.append(combined_read_filename)
            cat.run(Parameter(*[read for read in reads]),
                    Redirect(type='1>', dest=combined_read_filename))

            # Update reads list
            reads = [combined_read_filename]

    # Trim adapters with cutadapt
Example #4
0
__author__ = 'Dominic Fitzgerald'
import sys
from dive.components import Software, Parameter, Redirect

sample = sys.argv[1]

# Instantiate software
# Software(software_name, software_path)
picard = Software('picard', '/path/to/java /path/to/picard.jar')

# Run software
# Put as many Parameter and Redirect as needed
# Order matters, so generally Redirect should be last

# Parameter('arguments', 'separated', 'by', 'spaces')
picard.run(
    Parameter('I=' + sample),
    Parameter('O=/path/to/output'),
    Parameter('-T', 'SplitNCigarReads'),
    Redirect(type='>', dest='out.txt')
)

# Will produce and execute:
# /path/to/java /path/to/picard.jar I=/path/to/input.bam O=/path/to/output -T SplitNCigarReads > out.txt
Example #5
0
        if run_is_paired_end:
            # Aggregate read1s and read2s
            read1s, read2s = [], []
            for reads_set in reads:
                read1, read2 = reads_set.split(':')
                read1s.append(read1)
                read2s.append(read2)

            # Combine reads groups
            combined_reads = []
            for name, reads_group in [('read1', read1s), ('read2', read2s)]:
                combined_read_filename = os.path.join(output_dir, '{}.combined.{}.fastq.gz'.format(lib_prefix, name))
                combined_reads.append(combined_read_filename)
                staging_delete.append(combined_read_filename)
                cat.run(
                    Parameter(*[read for read in reads_group]),
                    Redirect(type='1>', dest=combined_read_filename)
                )

            # Update reads list
            reads = [':'.join(combined_reads)]
        else:
            # Combine reads
            combined_read_filename = os.path.join(output_dir, '{}.combined.fastq.gz'.format(lib_prefix))
            staging_delete.append(combined_read_filename)
            cat.run(
                Parameter(*[read for read in reads]),
                Redirect(type='1>', dest=combined_read_filename)
            )

            # Update reads list
            reads = [combined_read_filename]
Example #6
0
__author__ = 'Dominic Fitzgerald'
import sys
from dive.components import Software, Parameter, Redirect

sample = sys.argv[1]

# Instantiate software
# Software(software_name, software_path)
picard = Software('picard', '/path/to/java /path/to/picard.jar')

# Run software
# Put as many Parameter and Redirect as needed
# Order matters, so generally Redirect should be last

# Parameter('arguments', 'separated', 'by', 'spaces')
picard.run(Parameter('I=' + sample), Parameter('O=/path/to/output'),
           Parameter('-T', 'SplitNCigarReads'),
           Redirect(type='>', dest='out.txt'))

# Will produce and execute:
# /path/to/java /path/to/picard.jar I=/path/to/input.bam O=/path/to/output -T SplitNCigarReads > out.txt