sampleFastq.py single <number> <inFastq1> <outFastq1> sampleFastq.py pair <number> <inFastq1> <inFastq2> <outFastq1> <outFastq2> ''' # Import arguments import os from ngs_python.fastq import fastqIO from general_python import docopt # Extract arguments args = docopt.docopt(__doc__,version = 'v1') args['<number>'] = int(args['<number>']) if args['<number>'] < 1: raise ValueError('Number must be positive integer') # Create input fastq objects objects parseIn = fastqIO.parseFastq( fastq1=args['<inFastq1>'], fastq2=args['<inFastq2>']) countIn = parseIn.check_names() print '{} initial reads'.format(countIn) if countIn < args['<number>']: raise ValueError('Number exceeds reads in FASTQ file') # Sample FASTQ files parseIn.sample_reads( number=args['<number>'], sample=countIn, outFastq1=args['<outFastq1>'], outFastq2 = args['<outFastq2>']) # Check output reads parseOut = fastqIO.parseFastq(args['<outFastq1>'], args['<outFastq2>']) countOut = parseOut.check_names() print('{} final reads'.format(countOut)) if countOut != args['<number>']: raise ValueError('Number does not equal output count')
args.outFastq = args.outDir + args.sampleName + '_trimmed.fastq.gz' args.nameSortBam = args.outDir + args.sampleName + "_nSort.bam" args.outPairs = args.outDir + args.sampleName + ".readPairs.gz" args.outFrags = args.outDir + args.sampleName + ".fragLigations.gz" ############################################################################### ## Process FASTQ files and perform alignment ############################################################################### # Extract fastq file names args.read1, args.read2 = fastqFind.findFastq(prefix = args.fastqPrefix, dirList = args.fastqDir.split(','), pair = True) if len(args.read1) > 1 or len(args.read2) > 1: raise NotImplemented('Multiple FASTQ file input not implemented') # Trim and merge fastq files pf = fastqIO.parseFastq( fastq1 = args.read1[0], fastq2 = args.read2[0] ) trimMetrics = pf.interleave_trim_reads( outFastq = args.outFastq, trim = args.cutSite, minLength = args.minLength ) # Print trim metrics print '\nTrim Metrics:\n\t%s\n\t%s\n\t%s\n\t%s' %( 'total: ' + str(trimMetrics['total']), 'too short: ' + str(trimMetrics['short']), 'read1 trim: ' + str(trimMetrics['trim1']), 'read2 trim: ' + str(trimMetrics['trim2']) ) # Generate align command alignCommand = fastqAlign.bwaMemAlign(
sampleFastq.py single <number> <inFastq1> <outFastq1> sampleFastq.py pair <number> <inFastq1> <inFastq2> <outFastq1> <outFastq2> ''' # Import arguments import os from ngs_python.fastq import fastqIO from general_python import docopt # Extract arguments args = docopt.docopt(__doc__, version='v1') args['<number>'] = int(args['<number>']) if args['<number>'] < 1: raise ValueError('Number must be positive integer') # Create input fastq objects objects parseIn = fastqIO.parseFastq(fastq1=args['<inFastq1>'], fastq2=args['<inFastq2>']) countIn = parseIn.check_names() print '{} initial reads'.format(countIn) if countIn < args['<number>']: raise ValueError('Number exceeds reads in FASTQ file') # Sample FASTQ files parseIn.sample_reads(number=args['<number>'], sample=countIn, outFastq1=args['<outFastq1>'], outFastq2=args['<outFastq2>']) # Check output reads parseOut = fastqIO.parseFastq(args['<outFastq1>'], args['<outFastq2>']) countOut = parseOut.check_names() print('{} final reads'.format(countOut)) if countOut != args['<number>']: raise ValueError('Number does not equal output count')
'''interleavePairedFastq.py Usage: interlevePairedFastq.py <inFastq1> <inFastq2> <outFastq> ''' # Import arguments import os from ngs_python.fastq import fastqIO from general_python import docopt # Extract arguments args = docopt.docopt(__doc__,version = 'v1') # Create input fastq objects objects pf = fastqIO.parseFastq( fastq1=args['<inFastq1>'], fastq2=args['<inFastq2>']) # Interleave fastq files count = pf.interleave_reads(args['<outFastq>']) print(count)