sampleFastq.py single <number> <inFastq1> <outFastq1>
    sampleFastq.py pair <number> <inFastq1> <inFastq2> <outFastq1> <outFastq2>
    
'''
# Import arguments
import os
from ngs_python.fastq import fastqIO
from general_python import docopt
# Extract arguments
args = docopt.docopt(__doc__,version = 'v1')
args['<number>'] = int(args['<number>'])
if args['<number>'] < 1:
    raise ValueError('Number must be positive integer')
# Create input fastq objects objects
parseIn = fastqIO.parseFastq(
    fastq1=args['<inFastq1>'], fastq2=args['<inFastq2>'])
countIn = parseIn.check_names()
print '{} initial reads'.format(countIn)
if countIn < args['<number>']:
    raise ValueError('Number exceeds reads in FASTQ file')
# Sample FASTQ files
parseIn.sample_reads(
    number=args['<number>'], sample=countIn, outFastq1=args['<outFastq1>'],
    outFastq2 = args['<outFastq2>'])
# Check output reads
parseOut = fastqIO.parseFastq(args['<outFastq1>'], args['<outFastq2>'])
countOut = parseOut.check_names()
print('{} final reads'.format(countOut))
if countOut != args['<number>']:
    raise ValueError('Number does not equal output count')
Example #2
0
args.outFastq = args.outDir + args.sampleName + '_trimmed.fastq.gz'
args.nameSortBam = args.outDir + args.sampleName + "_nSort.bam"
args.outPairs = args.outDir + args.sampleName + ".readPairs.gz"
args.outFrags = args.outDir + args.sampleName + ".fragLigations.gz"

###############################################################################
## Process FASTQ files and perform alignment
###############################################################################
# Extract fastq file names
args.read1, args.read2 = fastqFind.findFastq(prefix = args.fastqPrefix,
    dirList = args.fastqDir.split(','), pair = True)
if len(args.read1) > 1 or len(args.read2) > 1:
    raise NotImplemented('Multiple FASTQ file input not implemented')
# Trim and merge fastq files
pf = fastqIO.parseFastq(
    fastq1 = args.read1[0],
    fastq2 = args.read2[0]
)
trimMetrics = pf.interleave_trim_reads(
    outFastq = args.outFastq,
    trim = args.cutSite,
    minLength = args.minLength
)
# Print trim metrics
print '\nTrim Metrics:\n\t%s\n\t%s\n\t%s\n\t%s' %(
    'total: ' + str(trimMetrics['total']),
    'too short: ' + str(trimMetrics['short']),
    'read1 trim: ' + str(trimMetrics['trim1']),
    'read2 trim: ' + str(trimMetrics['trim2'])
)
# Generate align command
alignCommand = fastqAlign.bwaMemAlign(
Example #3
0
    
    sampleFastq.py single <number> <inFastq1> <outFastq1>
    sampleFastq.py pair <number> <inFastq1> <inFastq2> <outFastq1> <outFastq2>
    
'''
# Import arguments
import os
from ngs_python.fastq import fastqIO
from general_python import docopt
# Extract arguments
args = docopt.docopt(__doc__, version='v1')
args['<number>'] = int(args['<number>'])
if args['<number>'] < 1:
    raise ValueError('Number must be positive integer')
# Create input fastq objects objects
parseIn = fastqIO.parseFastq(fastq1=args['<inFastq1>'],
                             fastq2=args['<inFastq2>'])
countIn = parseIn.check_names()
print '{} initial reads'.format(countIn)
if countIn < args['<number>']:
    raise ValueError('Number exceeds reads in FASTQ file')
# Sample FASTQ files
parseIn.sample_reads(number=args['<number>'],
                     sample=countIn,
                     outFastq1=args['<outFastq1>'],
                     outFastq2=args['<outFastq2>'])
# Check output reads
parseOut = fastqIO.parseFastq(args['<outFastq1>'], args['<outFastq2>'])
countOut = parseOut.check_names()
print('{} final reads'.format(countOut))
if countOut != args['<number>']:
    raise ValueError('Number does not equal output count')
'''interleavePairedFastq.py

Usage:
    
    interlevePairedFastq.py <inFastq1> <inFastq2> <outFastq>
    
'''
# Import arguments
import os
from ngs_python.fastq import fastqIO
from general_python import docopt
# Extract arguments
args = docopt.docopt(__doc__,version = 'v1')
# Create input fastq objects objects
pf = fastqIO.parseFastq(
    fastq1=args['<inFastq1>'], fastq2=args['<inFastq2>'])
# Interleave fastq files
count = pf.interleave_reads(args['<outFastq>'])
print(count)