-T IndelRealigner\ -R %s \ -targetIntervals %s.intervals \ -I %s \ -D %s \ -U \ -o %s" % (gatkbase, gatkref, args.infilename.replace(args.insuffix,''), args.infilename.replace(args.insuffix,'.dupsmarked.bam'), gatkdbsnp, args.infilename.replace(args.insuffix,'.processed.bam') )],shell=True) subparser=subparsers.add_parser( "exomePostProcess", help="Postprocess a single lane of exome data, including duplicate marking and quality recalibration. No indel realignment is done as BAQ is expected downstream") subparser.add_argument( "infilename", help="The input filename") subparser.add_argument( "-o","--out-suffix",dest="outsuffix",default=".processed.bam", help="The output suffix name, to be appended after stripping in-suffix from the input filename [default: .processed.bam]") subparser.add_argument( "-i","--in-suffix",dest='insuffix',default=".bam", help="The input file suffix, to be stripped before appending the outsuffix, [default: .bam]") subparser.add_argument( "-j","--javaDir",dest="javadir",default="/data/sedavis/usr/local/jars/", help="base directory for java jar files") subparser.add_argument( "-c","--configfile",dest="configfile",default="ngs.conf",
print( "java -jar /data/sedavis/usr/local/jars/picard/MergeSamFiles.jar ASSUME_SORTED=true OUTPUT=%s %s" % ( "'" + k + ".bam'", " ".join(["INPUT='" + x["Lane"] + "_" + x["Flowcell"] + args.suffix + "'" for x in v]), ) ) else: print( "ln -s %s %s" % (" ".join(["'" + x["Lane"] + "_" + x["Flowcell"] + args.suffix + "'" for x in v]), "'" + k + ".bam'") ) subparser = subparsers.add_parser( "makeMergeCmd", help="Use this to merge lanes of data into a single file, based on source_name in samplesheet" ) subparser.add_argument("samplesheet", help="The samplesheet for the project") subparser.add_argument( "-s", "--suffix", dest="suffix", default=".bam", help="Suffix to append to the standard lane_flowcell nomenclature for the input bam files (eg., 'recal.sorted.bam' or '.bam')", ) subparser.add_argument( "-t", "--sampleType", dest="sampletype", help="Limit the type of sample to deal with (Genomic DNA, mRNA, etc.) from the 'sample_type' column in the samplesheet", )
row[read]=row[read].replace('_1,2_','_2_') fnameremote=row[read] if(fnameremote=="None"): logger.info('Empty sequence file %s found for row:\n%s' % (read,row)) if(args.altsuffix): fnameremote=fnameremote.replace('.tgz',args.altsuffix) fnamelocal=fnameremote # default same name if((int(row['index_read'])>0) & (read=='r2sequence')): logger.info("Found a lane with indexing, so transferring the third read and renaming to read 2") fnameremote=fnameremote.replace('_2_','_3_') _doRsync(args.fromloc,args.toloc,fnameremote,fnamelocal) subparser = subparsers.add_parser( 'getsequence', help="Given a sample sheet in standard form, rsync the files from one location to another") subparser.add_argument( "-f","--from-location",required=True,dest='fromloc', help="A uri (without filename) as recognized by rsync, such as [email protected]:/path/to/sequencefiles") subparser.add_argument( "-t","--to-location",dest="toloc",default='.', help="A uri (without filename) as recognized by rsync, such as [email protected]:/path/to/sequencefiles or '.' [default '.']") subparser.add_argument( "-a","--alternative-suffix",dest="altsuffix",default='.', help="The sequence files have a suffix '.tgz' that could be replaced with another suffix, for example '.fq.gz' to get the fastq files. Specify the alternate suffix here.") subparser.add_argument( "samplesheet", help="The name of the samplesheet file from which to pull the sequence names") subparser.set_defaults(func=func)
r2 = row['r2sequence'].replace('.tgz','.fq.gz') r2 = r2.replace('_1,2_','_2_') if(r2=='None'): print("ngtools doAlign -1 %s --rg_id '%s' --rg_lb '%s' --rg_sm '%s' %d %s %s %s" % (r1,row['Lane']+"_"+row['Flowcell'], row['library_name'],row['source_name'], 8,'/data/sedavis/public/novoalign/hg18.index', '/data/sedavis/public/sequence/ucsc/hg18/genome.fa.fai', row['Lane']+"_"+row['Flowcell'])) else: print("ngtools doAlign -1 %s -2 %s --rg_id '%s' --rg_lb '%s' --rg_sm '%s' %d %s %s %s" % (r1,r2,row['Lane']+"_"+row['Flowcell'], row['library_name'],row['source_name'], 8,'/data/sedavis/public/novoalign/hg18.index', '/data/sedavis/public/sequence/ucsc/hg18/genome.fa.fai', row['Lane']+"_"+row['Flowcell'])) subparser=subparsers.add_parser( "makeAlignCmd", help="generate a .cmd file for swarm submission. Output is simply to stdout.") subparser.add_argument( "samplesheet") subparser.add_argument( 'faifile') subparser.add_argument( 'novoindex') subparser.set_defaults(func=func)
from ngs.main import subparsers import ngs.solexadb.model def func(args): sdb = ngs.solexadb.model.SolexaDB(args.uri) res = sdb.getSampleSheetByStudyID(args.study_id) print "\t".join([str(x) for x in res['keys']]) for row in res['rows']: print "\t".join([str(x) for x in row]) subparser = subparsers.add_parser( 'makeSampleSheet', help="Generate a standard sample sheet for a given study id") subparser.add_argument( "-u","--uri",dest='uri', help="Database uri, like 'mysql://*****:*****@localhost/solexa'") subparser.add_argument( "-s","--study-id",dest='study_id',type=int, help="Study ID from the solexa database") subparser.set_defaults(func=func)
if(args.rg_id is not None): samtag+="\tID:%s" % (args.rg_id) if(args.rg_sm is not None): samtag+="\tSM:%s" % (args.rg_id) if(args.rg_lb is not None): samtag+="\tLB:%s" % (args.rg_id) if(args.read2 is None): x=subprocess.call(["novoalign -k -a -H -o SAM '%s' -c %d -d %s -f %s | samtools import %s - - | samtools sort - %s" % (samtag,args.nproc, args.novoidx, args.read1, args.faifile, args.prefix + ".tmp")],shell=True) # paired end else: x=subprocess.call(["novoalign -k -a -H -o SAM '%s' -c %d -d %s -f %s %s | samtools import %s - - | samtools sort - %s" % (samtag,args.nproc, args.novoidx, args.read1, args.read2, args.faifile, args.prefix + ".tmp")],shell=True) if(x==0): os.rename(args.prefix+".tmp"+".bam",args.prefix+".bam") subparser = subparsers.add_parser( 'doAlign', help="do the alignment given a set of inputs") subparser.add_argument( "nproc",type=int,default=1, help="The number of cores to use") subparser.add_argument( "novoidx", help="The novoalign index to use") subparser.add_argument( "faifile", help="The .fai file to use") subparser.add_argument( '-1',"--read1", help="read1 filename") subparser.add_argument( '-2',"--read2",