Esempio n. 1
0
        -T IndelRealigner\
        -R %s \
        -targetIntervals %s.intervals \
        -I %s \
        -D %s \
        -U \
        -o %s" % (gatkbase,
                  gatkref,
                  args.infilename.replace(args.insuffix,''),
                  args.infilename.replace(args.insuffix,'.dupsmarked.bam'),
                  gatkdbsnp,
                  args.infilename.replace(args.insuffix,'.processed.bam')
                  )],shell=True)
                                          
subparser=subparsers.add_parser(
    "exomePostProcess",
    help="Postprocess a single lane of exome data, including duplicate marking and quality recalibration.  No indel realignment is done as BAQ is expected downstream")
subparser.add_argument(
    "infilename",
    help="The input filename")
subparser.add_argument(
    "-o","--out-suffix",dest="outsuffix",default=".processed.bam",
    help="The output suffix name, to be appended after stripping in-suffix from the input filename [default: .processed.bam]")
subparser.add_argument(
    "-i","--in-suffix",dest='insuffix',default=".bam",
    help="The input file suffix, to be stripped before appending the outsuffix, [default: .bam]")
subparser.add_argument(
    "-j","--javaDir",dest="javadir",default="/data/sedavis/usr/local/jars/",
    help="base directory for java jar files")
subparser.add_argument(
    "-c","--configfile",dest="configfile",default="ngs.conf",
Esempio n. 2
0
            print(
                "java -jar /data/sedavis/usr/local/jars/picard/MergeSamFiles.jar ASSUME_SORTED=true OUTPUT=%s %s"
                % (
                    "'" + k + ".bam'",
                    " ".join(["INPUT='" + x["Lane"] + "_" + x["Flowcell"] + args.suffix + "'" for x in v]),
                )
            )
        else:
            print(
                "ln -s %s %s"
                % (" ".join(["'" + x["Lane"] + "_" + x["Flowcell"] + args.suffix + "'" for x in v]), "'" + k + ".bam'")
            )


subparser = subparsers.add_parser(
    "makeMergeCmd", help="Use this to merge lanes of data into a single file, based on source_name in samplesheet"
)
subparser.add_argument("samplesheet", help="The samplesheet for the project")
subparser.add_argument(
    "-s",
    "--suffix",
    dest="suffix",
    default=".bam",
    help="Suffix to append to the standard lane_flowcell nomenclature for the input bam files (eg., 'recal.sorted.bam' or '.bam')",
)
subparser.add_argument(
    "-t",
    "--sampleType",
    dest="sampletype",
    help="Limit the type of sample to deal with (Genomic DNA, mRNA, etc.) from the 'sample_type' column in the samplesheet",
)
Esempio n. 3
0
                row[read]=row[read].replace('_1,2_','_2_')
            fnameremote=row[read]
            if(fnameremote=="None"):
                logger.info('Empty sequence file %s found for row:\n%s' % (read,row))
            if(args.altsuffix):
                fnameremote=fnameremote.replace('.tgz',args.altsuffix)
            fnamelocal=fnameremote # default same name
            if((int(row['index_read'])>0) & (read=='r2sequence')):
                logger.info("Found a lane with indexing, so transferring the third read and renaming to read 2")
                fnameremote=fnameremote.replace('_2_','_3_')
            _doRsync(args.fromloc,args.toloc,fnameremote,fnamelocal)

        

subparser = subparsers.add_parser(
    'getsequence',
    help="Given a sample sheet in standard form, rsync the files from one location to another")
subparser.add_argument(
    "-f","--from-location",required=True,dest='fromloc',
    help="A uri (without filename) as recognized by rsync, such as [email protected]:/path/to/sequencefiles")
subparser.add_argument(
    "-t","--to-location",dest="toloc",default='.',
    help="A uri (without filename) as recognized by rsync, such as [email protected]:/path/to/sequencefiles or '.' [default '.']")
subparser.add_argument(
    "-a","--alternative-suffix",dest="altsuffix",default='.',
    help="The sequence files have a suffix '.tgz' that could be replaced with another suffix, for example '.fq.gz' to get the fastq files.  Specify the alternate suffix here.")
subparser.add_argument(
    "samplesheet",
    help="The name of the samplesheet file from which to pull the sequence names")

subparser.set_defaults(func=func)
Esempio n. 4
0
        r2 = row['r2sequence'].replace('.tgz','.fq.gz')
        r2 = r2.replace('_1,2_','_2_')
        if(r2=='None'):
            print("ngtools doAlign -1 %s --rg_id '%s' --rg_lb '%s' --rg_sm '%s' %d %s %s %s" %
                  (r1,row['Lane']+"_"+row['Flowcell'],
                   row['library_name'],row['source_name'],
                   8,'/data/sedavis/public/novoalign/hg18.index',
                   '/data/sedavis/public/sequence/ucsc/hg18/genome.fa.fai',
                   row['Lane']+"_"+row['Flowcell']))
        else:
            print("ngtools doAlign -1 %s -2 %s --rg_id '%s' --rg_lb '%s' --rg_sm '%s' %d %s %s %s" %
                  (r1,r2,row['Lane']+"_"+row['Flowcell'],
                   row['library_name'],row['source_name'],
                   8,'/data/sedavis/public/novoalign/hg18.index',
                   '/data/sedavis/public/sequence/ucsc/hg18/genome.fa.fai',
                   row['Lane']+"_"+row['Flowcell']))


subparser=subparsers.add_parser(
    "makeAlignCmd",
    help="generate a .cmd file for swarm submission.  Output is simply to stdout.")
subparser.add_argument(
    "samplesheet")
subparser.add_argument(
    'faifile')
subparser.add_argument(
    'novoindex')
subparser.set_defaults(func=func)
              
        
Esempio n. 5
0
from ngs.main import subparsers
import ngs.solexadb.model

def func(args):
    sdb = ngs.solexadb.model.SolexaDB(args.uri)
    res = sdb.getSampleSheetByStudyID(args.study_id)
    print "\t".join([str(x) for x in res['keys']])
    for row in res['rows']:
        print "\t".join([str(x) for x in row])

subparser = subparsers.add_parser(
    'makeSampleSheet',
    help="Generate a standard sample sheet for a given study id")
subparser.add_argument(
    "-u","--uri",dest='uri',
    help="Database uri, like 'mysql://*****:*****@localhost/solexa'")
subparser.add_argument(
    "-s","--study-id",dest='study_id',type=int,
    help="Study ID from the solexa database")
subparser.set_defaults(func=func)
Esempio n. 6
0
    if(args.rg_id is not None):
        samtag+="\tID:%s" % (args.rg_id)
    if(args.rg_sm is not None):
        samtag+="\tSM:%s" % (args.rg_id)
    if(args.rg_lb is not None):
        samtag+="\tLB:%s" % (args.rg_id)
    if(args.read2 is None):
        x=subprocess.call(["novoalign -k -a -H -o SAM '%s' -c %d -d %s -f %s | samtools import %s - - | samtools sort - %s" % (samtag,args.nproc, args.novoidx, args.read1, args.faifile, args.prefix + ".tmp")],shell=True)
    # paired end
    else:
        x=subprocess.call(["novoalign -k -a -H -o SAM '%s' -c %d -d %s -f %s %s | samtools import %s - - | samtools sort - %s" % (samtag,args.nproc, args.novoidx, args.read1, args.read2, args.faifile, args.prefix + ".tmp")],shell=True)
    if(x==0):
        os.rename(args.prefix+".tmp"+".bam",args.prefix+".bam")

subparser = subparsers.add_parser(
    'doAlign',
    help="do the alignment given a set of inputs")
subparser.add_argument(
    "nproc",type=int,default=1,
    help="The number of cores to use")
subparser.add_argument(
    "novoidx",
    help="The novoalign index to use")
subparser.add_argument(
    "faifile",
    help="The .fai file to use")
subparser.add_argument(
    '-1',"--read1",
    help="read1 filename")
subparser.add_argument(
    '-2',"--read2",