" REFERENCE_SEQUENCE=",
        opts.refseq,
        " WINDOW_SIZE=",
        opts.windowsize,
        " MINIMUM_GENOME_FRACTION=",
        opts.mingenofrac,
        " INPUT=",
        opts.input,
        " OUTPUT=",
        tempout,
        " TMP_DIR=",
        opts.tmp_dir,
        " CHART_OUTPUT=",
        temppdf,
        " SUMMARY_OUTPUT=",
        temptab,
        " VALIDATION_STRINGENCY=LENIENT",
    ]
    process = Popen("".join(cl), shell=True, stderr=standard_fd, stdout=standard_fd, cwd=output_folder)
    return_value = process.wait()
    standard_fd.close()

    fixPicardOutputs(
        tempout=temptab,
        output_dir=output_folder,
        log_file=opts.log_file,
        html_output=os.path.join(output_folder, opts.html_output),
        progname=progname,
        cl=cl,
    )
Beispiel #2
0
def picVal(opts=None):
    """
    called with sam so no need to convert
    """
    assert opts <> None
    killme = string.punctuation + string.whitespace
    trantab = string.maketrans(killme, '_' * len(killme))
    title = opts.title.translate(trantab)
    tempout = os.path.join(opts.output_dir, 'rgPicardValidate.out')
    temptab = os.path.join(opts.output_dir, 'rgPicardValidate.xls')
    opts.log_file = opts.log or os.path.join(opts.output_dir,
                                             'rgPicardValidate_%s.log' % title)
    # Create output folder and save our R script in there.
    stf = open(opts.log_file, 'w')
    sortedfile = None
    if verbose:
        print '# opts.ignore', opts.ignore, ' opts.sortme=', opts.sortme
    if opts.sortme:
        fd, sortedfile = tempfile.mkstemp(suffix='rgcleansam.sorted.bam')
        if opts.datatype == 'sam':  # need to work with a bam
            tempbam = samToBam(opts.input, opts.outdir)
            pysam.sort(tempbam, sortedfile)
        else:  # is already bam
            pysam.sort(opts.input, sortedfile)
    cl = [
        'java -Xmx', opts.maxjheap, ' -jar ', opts.jar, ' O=', tempout,
        ' TMP_DIR=', opts.tmp_dir
    ]
    if verbose:
        print '# cl so far', cl
    if opts.sortme:
        cl.append(' I=%s' % sortedfile)
    else:
        cl.append(' I=%s' % opts.input)
    if opts.maxoutput == '0':
        opts.maxoutput = '65535'
    cl.append(' MAX_OUTPUT=%s' % opts.maxoutput)
    if opts.ignore[0] <> 'None':  # picard error values to ignore
        cl += [' IGNORE=%s' % x for x in opts.ignore if x <> 'None']
    if opts.bisulphite.lower() <> 'false':
        cl.append(' IS_BISULFITE_SEQUENCED=true')
    if opts.refseq <> '':
        cl += [
            ' R=%s' % opts.refseq,
        ]
    s1 = ' '.join(['"%s"' % x for x in cl])
    s = '## rgPicardValidate.py about to Popen:\n%s\n' % s1
    stf.write(s)
    if verbose:
        print s
    pefilename = os.path.join(opts.output_dir,
                              'rgPicardValidate_%s.errors' % title)
    picerrors = open(pefilename, 'w')
    process = Popen(''.join(cl),
                    shell=True,
                    stderr=picerrors,
                    stdout=picerrors,
                    cwd=opts.output_dir)
    return_value = process.wait()
    picerrors.close()
    pe = open(pefilename, 'r').readlines()
    stf.write('## got %d rows - first few =%s\n' %
              (len(pe), '\n'.join(pe[:5])))
    if opts.dryrun <> 'dryrun':  # want to run cleansam
        if opts.dryrun == 'sam':
            outformat = 'sam'
            newsam = opts.sam
        elif opts.dryrun == 'bam':
            outformat = 'bam'
            newsam = opts.bam
        cleanSam(insam=opts.input,
                 newsam=newsam,
                 picardErrors=pe,
                 outformat=outformat,
                 sortme=opts.sortme)
    stf.close()
    fixPicardOutputs(tempout=tempout,
                     output_dir=opts.output_dir,
                     log_file=opts.log_file,
                     html_output=opts.html_output,
                     progname=progname,
                     cl=cl,
                     transpose=False)
    if opts.sortme:
        os.unlink(sortedfile)
        if opts.datatype == 'sam':  # was converted
            os.unlink(tempbam)  # temporary
Beispiel #3
0
                    stderr=picerrors,
                    stdout=picerrors,
                    cwd=opts.output_dir)
    return_value = process.wait()
    picerrors.close()
    pe = open(pefilename, 'r').readlines()
    stf.write('## got %d rows - first few =%s\n' %
              (len(pe), '\n'.join(pe[:5])))
    if opts.dryrun <> 'dryrun':  # want to run cleansam
        if opts.dryrun == 'sam':
            outformat = 'sam'
            newsam = opts.sam
        elif opts.dryrun == 'bam':
            outformat = 'bam'
            newsam = opts.bam
        cleanSam(insam=opts.input,
                 newsam=newsam,
                 picardErrors=pe,
                 outformat=outformat,
                 sortme=opts.sortme)
    stf.close()
    fixPicardOutputs(tempout=tempout,
                     output_dir=opts.output_dir,
                     log_file=opts.log_file,
                     html_output=opts.html_output,
                     progname=progname,
                     cl=cl,
                     transpose=False)
    if opts.sortme:
        os.unlink(sortedfile)
Beispiel #4
0
    # Create output folder and save our R script in there.
    standard_fd = open(opts.log_file, 'w')
    cl = [
        'java -Xmx', opts.maxjheap, ' -jar ', opts.jar, ' REFERENCE_SEQUENCE=',
        opts.refseq, ' WINDOW_SIZE=', opts.windowsize,
        ' MINIMUM_GENOME_FRACTION=', opts.mingenofrac, ' INPUT=', opts.input,
        ' OUTPUT=', tempout, ' TMP_DIR=', opts.tmp_dir, ' CHART_OUTPUT=',
        temppdf, ' SUMMARY_OUTPUT=', temptab, ' VALIDATION_STRINGENCY=LENIENT'
    ]
    process = Popen(''.join(cl),
                    shell=True,
                    stderr=standard_fd,
                    stdout=standard_fd,
                    cwd=opts.output_dir)
    return_value = process.wait()
    cl = 'mogrify -format jpg -resize x500 %s' % (
        temppdf)  # make the jpg for fixPicardOutputs to find
    process = Popen(cl,
                    shell=True,
                    stderr=standard_fd,
                    stdout=standard_fd,
                    cwd=opts.output_dir)
    return_value = process.wait()
    standard_fd.close()
    fixPicardOutputs(tempout=temptab,
                     output_dir=opts.output_dir,
                     log_file=opts.log_file,
                     html_output=opts.html_output,
                     progname=progname,
                     cl=cl)
    else:
        bedToPicInterval(opts.target, targetf)   
    tempout = os.path.join(opts.output_dir,'rgPicardHsMetrics.out')
    opts.log_file = opts.log or os.path.join(opts.output_dir, '%s.log' % title)


    sfd = open(opts.log_file, 'w')

    cl = ['java -Xmx',opts.maxjheap,' -jar ',opts.jar,
          ' BAIT_INTERVALS=',baitf,' TARGET_INTERVALS=',
          targetf,' INPUT=',os.path.abspath(opts.input),
          ' OUTPUT=',tempout,
          ' VALIDATION_STRINGENCY=LENIENT', ' TMP_DIR=',opts.tmp_dir
          ]
    process = Popen(''.join(cl), shell=True, stderr=sfd, stdout=sfd, cwd=opts.output_dir)
    return_value = process.wait()
    sfd.close()

    fixPicardOutputs(tempout=tempout, output_dir=opts.output_dir, 
                     log_file=opts.log_file,
                     html_output=os.path.join(opts.output_dir, opts.html_output),
                     progname=progname,
                     cl=cl)



    

    
    
        destination = os.path.join(output_folder, new_filename)
        shutil.copy(os.path.abspath(opts.refseq), destination)
        opts.refseq = destination

    standard_fd = open(opts.log_file, 'w')
    cl = [
        'java -Xmx', opts.maxjheap, ' -jar ', opts.jar, ' REFERENCE_SEQUENCE=',
        os.path.abspath(opts.refseq), ' ASSUME_SORTED=', opts.assume_sorted,
        ''.join([' ADAPTER_SEQUENCE=%s' % x
                 for x in opts.adapters]), ' IS_BISULFITE_SEQUENCED=',
        opts.bisulphite, ' MAX_INSERT_SIZE=', opts.maxinsert, ' INPUT=',
        os.path.abspath(opts.input), ' OUTPUT=', tempout,
        ' VALIDATION_STRINGENCY=LENIENT', ' TMP_DIR=', opts.tmp_dir
    ]
    process = Popen(''.join(cl),
                    shell=True,
                    stderr=standard_fd,
                    stdout=standard_fd,
                    cwd=opts.output_dir)
    return_value = process.wait()

    standard_fd.close()

    fixPicardOutputs(tempout=tempout,
                     output_dir=opts.output_dir,
                     log_file=opts.log_file,
                     html_output=os.path.join(opts.output_dir,
                                              opts.html_output),
                     progname=progname,
                     cl=cl)
    print s    
    pefilename = os.path.join(opts.output_dir, 'rgPicardValidate_%s.errors' % title)
    picerrors = open(pefilename,'w')
    process = Popen(''.join(cl), shell=True, stderr=picerrors, stdout=picerrors, cwd=opts.output_dir)
    return_value = process.wait()
    picerrors.close()
    pe = open(pefilename,'r').readlines()
    stf.write('## got %d rows - first few =%s\n' % (len(pe), '\n'.join(pe[:5])))
    if opts.dryrun <> 'dryrun': # want to run cleansam
        if opts.dryrun == 'sam':
            outformat = 'sam'
            newsam = opts.sam
        elif opts.dryrun == 'bam':
            outformat = 'bam'            
            newsam = opts.bam
        cleanSam(insam=opts.input, newsam=newsam, picardErrors=pe,outformat=outformat,sortme=opts.sortme)
    stf.close()
    fixPicardOutputs(tempout=tempout,output_dir=opts.output_dir,
      log_file=opts.log_file,html_output=opts.html_output,progname=progname,cl=cl,transpose=False)
    if opts.sortme:
        os.unlink(sortedfile)

        


    

    
    

def picVal(opts=None):
    """
    called with sam so no need to convert
    """
    assert opts <> None
    killme = string.punctuation + string.whitespace
    trantab = string.maketrans(killme,'_'*len(killme))
    title = opts.title.translate(trantab)
    tempout = os.path.join(opts.output_dir,'rgPicardValidate.out') 
    temptab = os.path.join(opts.output_dir,'rgPicardValidate.xls')
    opts.log_file = opts.log or os.path.join(opts.output_dir, 'rgPicardValidate_%s.log' % title)
    # Create output folder and save our R script in there.
    stf = open(opts.log_file, 'w')
    sortedfile=None
    if verbose:
        print '# opts.ignore',opts.ignore,' opts.sortme=',opts.sortme
    if opts.sortme:
        fd,sortedfile = tempfile.mkstemp(suffix='rgcleansam.sorted.bam')
        if opts.datatype == 'sam': # need to work with a bam 
            tempbam = samToBam(opts.input,opts.outdir)
            pysam.sort(tempbam,sortedfile)
        else: # is already bam
            pysam.sort(opts.input,sortedfile)
    cl = ['java -Xmx',opts.maxjheap,' -jar ',opts.jar,' O=',tempout,' TMP_DIR=',opts.tmp_dir]
    if verbose:
        print '# cl so far',cl
    if opts.sortme:
    	cl.append(' I=%s' % sortedfile)
    else:
    	cl.append(' I=%s' % opts.input)
    if opts.maxoutput == '0':
        opts.maxoutput = '65535'
    cl.append(' MAX_OUTPUT=%s' % opts.maxoutput)
    if opts.ignore[0] <> 'None': # picard error values to ignore
        cl += [' IGNORE=%s' % x for x in opts.ignore if x <> 'None']
    if opts.bisulphite.lower() <> 'false':
        cl.append(' IS_BISULFITE_SEQUENCED=true')
    if opts.refseq <> '':
        cl += [' R=%s' % opts.refseq,]
    s1 = ' '.join(['"%s"' % x for x in cl])
    s = '## rgPicardValidate.py about to Popen:\n%s\n' % s1    
    stf.write(s)
    if verbose:
        print s    
    pefilename = os.path.join(opts.output_dir, 'rgPicardValidate_%s.errors' % title)
    picerrors = open(pefilename,'w')
    process = Popen(''.join(cl), shell=True, stderr=picerrors, stdout=picerrors, cwd=opts.output_dir)
    return_value = process.wait()
    picerrors.close()
    pe = open(pefilename,'r').readlines()
    stf.write('## got %d rows - first few =%s\n' % (len(pe), '\n'.join(pe[:5])))
    if opts.dryrun <> 'dryrun': # want to run cleansam
        if opts.dryrun == 'sam':
            outformat = 'sam'
            newsam = opts.sam
        elif opts.dryrun == 'bam':
            outformat = 'bam'            
            newsam = opts.bam
        cleanSam(insam=opts.input, newsam=newsam, picardErrors=pe,outformat=outformat,sortme=opts.sortme)
    stf.close()
    fixPicardOutputs(tempout=tempout,output_dir=opts.output_dir,
      log_file=opts.log_file,html_output=opts.html_output,progname=progname,cl=cl,transpose=False)
    if opts.sortme:
        os.unlink(sortedfile)
        if opts.datatype == 'sam': # was converted 
            os.unlink(tempbam) # temporary
    except:
        pass
    title = opts.namejob.translate(trantab)
    tempout = os.path.join(opts.output_dir,'rgPicardGCBiasMetrics.out')
    temppdf = os.path.join(opts.output_dir,'rgPicardGCBiasMetrics.pdf')
    temptab = os.path.join(opts.output_dir,'rgPicardGCBiasMetrics.xls')
    opts.log_file = opts.log or os.path.join(opts.output_dir, '%s.log' % title)
    # Create output folder and save our R script in there.
    standard_fd = open(opts.log_file, 'w')
    cl = ['java -Xmx',opts.maxjheap,' -jar ',opts.jar,' REFERENCE_SEQUENCE=',opts.refseq,' WINDOW_SIZE=',opts.windowsize,
    ' MINIMUM_GENOME_FRACTION=',opts.mingenofrac,' INPUT=',opts.input,' OUTPUT=',tempout,' TMP_DIR=',opts.tmp_dir,
    ' CHART_OUTPUT=',temppdf,' SUMMARY_OUTPUT=',temptab,' VALIDATION_STRINGENCY=LENIENT']
    process = Popen(''.join(cl), shell=True, stderr=standard_fd, stdout=standard_fd, cwd=opts.output_dir)
    return_value = process.wait()
    cl = 'mogrify -format jpg -resize x500 %s' % (temppdf) # make the jpg for fixPicardOutputs to find
    process = Popen(cl, shell=True, stderr=standard_fd, stdout=standard_fd, cwd=opts.output_dir)
    return_value = process.wait()
    standard_fd.close()
    fixPicardOutputs(tempout=temptab,output_dir=opts.output_dir,
      log_file=opts.log_file,html_output=opts.html_output,progname=progname,cl=cl)