Ejemplo n.º 1
0
def removeDuplicates(bamFile, outFiles):
    'Use the flag 0x400 to remove the duplicated reads'
    output, flagFile = outFiles
    #------------------------------build shell command--------------------------------------
    comm = 'samtools view -bh -F 0x400 ' + bamFile + ' > ' + output
    #---------------------------------------------------------------------------------------  
    runJob(comm, 'extractDuplicates', flagFile)
Ejemplo n.º 2
0
def featureCounts(bamFile, outFiles):
    output, flagFile = outFiles
    #------------------------------build shell command--------------------------------------
    headParams = 'featureCounts -T 4 -b -p -a ' + refTranscripts + ' -t exon'
    tailParams = ' -S -g gene_id -o ' + output + ' ' + bamFile
    comm = headParams + tailParams
    #---------------------------------------------------------------------------------------  
    runJob(comm, 'featureCounts', flagFile)
Ejemplo n.º 3
0
def htSeq(bamFile, outFiles):
    output, flagFile = outFiles
    #------------------------------build shell command--------------------------------------
    headParams = 'samtools view -h ' + bamFile + ' | '
    midParams = 'python -m HTSeq.scripts.count  --stranded=no -m intersection-nonempty'
    tailParams = ' - ' + refTranscripts + ' > ' + output
    comm = headParams + midParams + tailParams
    #---------------------------------------------------------------------------------------  
    runJob(comm, 'htSeq', flagFile)
Ejemplo n.º 4
0
def dexSeqCount(bamFile, outFiles):
    '''Use the exon based gtf file from the previous step
    The -s option is required to use the unstranded option for the reference. Otherwise there is an error. 
    Input for DEXseq package in R.'''
    output, flagFile = outFiles
    #------------------------------build shell command--------------------------------------
    headParams = 'samtools sort -m 5000000000 -no ' + bamFile + ' - | '
    midParams =  'samtools view -h - | '
    tailParams = 'python ' + countScript + ' -s no exonAnnotation.gtf ' + ' - ' + output
    comm = headParams + midParams + tailParams
    #---------------------------------------------------------------------------------------
    runJob(comm, 'contExons', flagFile)
Ejemplo n.º 5
0
def defuse(read1, outFiles):
    '''Note that running deFuse again causes the program to continue where it left off
    You need to delete all the files in the output directory to make it completly restart. The annotate fusions subscript 
    crashes because the genome reference is given as the basebase in the config file (genome). I had to change this after 
    annotation crashed to (genome.fa). None of the R packages had the ada package. I had to install it in my home folder
    and create a R environmental variable echo 'R_LIBS_USER=~/R/x86_64-unknown-linux-gnu-library/2.15' >  $HOME/.Renviron
    '''
    read2 = re.sub('_R1_','_R2_', read1)
    output, flagFile = outFiles
    rgID = output[0:7]
    #------------------------------build shell command--------------------------------------
    headParams = '/usr/local/defuse/0.6.1-gcc/scripts/defuse.pl -c config.txt --1fastq '
    midParams = read1 + ' --2fastq ' + read2 + ' -o /vlsci/VR0238/shared/DanB_batch1/trimFastq/catFastq/'
    tailParams = rgID + '_out -p 8'
    comm = headParams + midParams + tailParams
    #---------------------------------------------------------------------------------------  
    runJob(comm, 'defuse', flagFile)
Ejemplo n.º 6
0
def interSectBED(bedFile, outFiles):
    output, flagFile = outFiles
    #------------------------------build shell command--------------------------------------
    comm = 'pairToBed -c -a ' + bedFile + ' -b ' + rRnaBedFile + ' > ' + output
    #---------------------------------------------------------------------------------------  
    runJob(comm, 'interSectBED', flagFile)
Ejemplo n.º 7
0
def bamToBed(bamFile, outFiles):
    output, flagFile = outFiles
    #------------------------------build shell command--------------------------------------
    comm = 'bamToBed -bedpe -i ' + bamFile + ' > ' + output
    #---------------------------------------------------------------------------------------  
    runJob(comm, 'makeBED', flagFile)
Ejemplo n.º 8
0
def sortName(bamFile, outFiles):
    output, flagFile = outFiles
    #------------------------------build shell command--------------------------------------
    comm = 'samtools sort -m 5000000000 -no ' + bamFile + ' - > ' + output
    #---------------------------------------------------------------------------------------  
    runJob(comm, 'sortReadName', flagFile)