Beispiel #1
0
def convertPs(psfile):
    """Utility function to convert ps file to pdf
    during test
    """
    if os.path.isfile(psfile):
        cmd = "ps2pdf %s" % (psfile)
        runCommand(cmd, "T")
    else:
        pass
    return
Beispiel #2
0
def trim_Ad1_Ad2(input_fq, trimleft, trimright, output_fq):
    """Trim TEMPOseq adaptors from the fastq file, both 3' and 5' adaptors are 17 nc"""
    cmds = [
        'seqtk trimfq',
        '-b ', str(trimleft),
        '-e ', str(trimright),
        input_fq,
        '> ', output_fq,
    ]
    cmds = '  '.join(cmds)
    runCommand(cmds, True)
Beispiel #3
0
def createQuality(input, output):
    """Check quality of the fastqfile

    Arguments:
        -`input`: The input fastq file
        -`output`: The output folder for the analysis
    """
    cmds = [
        'fastqc',
        input,
        '-o', output,
     ]
    cmds = '  '.join(cmds)
    cmds += " 2>&1 | tee -a  " + output + "/analysis_quality.log"
    runCommand(cmds, True)
    return
Beispiel #4
0
def split_well_barcode(wellbarcode, fastqFile, inputDir, prefixWell, suffix):
    """split fastq based on plate barcode"""
    cmds = [
        'cat', fastqFile, '|',
        'fastx_barcode_splitter.pl',
        '--bcfile',wellbarcode,
        '--prefix', prefixWell,
        '--suffix', suffix,
        '--bol',
        '--partial', str(1),
        '--mismatches', str(1),
    ]
    welllog =  prefixWell + "wellsplit_barcode.log"
    cmds ='  '.join(cmds)
    cmds +=" 2>&1 | tee   " + welllog
    runCommand(cmds, True)
    return
Beispiel #5
0
def split_plate_barcode(barcodes,fastqFile, outDir, prefixPlate,suffix):
    """split fastq based on plate barcode"""
    platebc = barcodes[0]
    cmds = [
        'cat', fastqFile, '|',
        'fastx_barcode_splitter.pl',
        '--bcfile', platebc,
        '--prefix', prefixPlate,
        '--suffix', suffix,
        '--eol',
        '--partial', str(1),
        '--mismatches', str(1),
    ]
    plateLog =  prefixPlate + "platesplit_barcode.log"
    cmds ='  '.join(cmds)
    cmds +=" 2>&1 | tee   " + plateLog
    runCommand(cmds, True)
    return
Beispiel #6
0
def map_seq_to_probes(fastq, genomeDir, numCPU, outPrefix):
    """Map the sequence to the probes genome file using STAR"""
    cmds = [
        'STAR',
        '--genomeDir', genomeDir,
        '--readFilesIn', fastq,
        '--readFilesCommand zcat',
        '--runThreadN ', str(numCPU),
        '--outFileNamePrefix', outPrefix,
        '--outSAMtype SAM',
        '--scoreDelOpen -10000',
        '--scoreInsOpen -10000',
        '--outFilterMismatchNmax 2',
        '--outSAMunmapped Within',
        '--outSAMattributes AS nM',
        ' --genomeLoad NoSharedMemory',
    ]
    cmds = '  '.join(cmds)
    runCommand(cmds, True)
Beispiel #7
0
def index_db_file(input, output, cpuNum, gtfFile):
    """Index the probe fastfile to use as db file"""
    with open(input) as myfile:
        head = [next(myfile) for x in xrange(2)]
    #print head
    seq = head[1].strip()
    seqLen = len(seq)
    print seqLen
    print input
    cmd = [
        'grep',
        '">"',
        input,
        '| wc -l',
    ]
    cmd = '  '.join(cmd)
    totalProbes = runCommand(cmd, True)
    totalProbes = totalProbes[0].strip()
    print totalProbes
    genomeSize = int(seqLen) * int(totalProbes)
    print genomeSize
    scale_factor = np.fmin(14, np.log2(genomeSize) / 2 - 1)
    scale_factor = np.round(scale_factor).astype(int)
    print scale_factor
    cmds = [
        'STAR',
        '--runMode genomeGenerate',
        '--genomeDir',
        output,
        '--genomeFastaFiles',
        input,
        '--sjdbGTFfile',
        gtfFile,
        '--sjdbGTFfeatureExon exon',
        '--runThreadN',
        str(cpuNum),
        '--genomeSAindexNbases',
        str(scale_factor),
    ]
    cmds = '  '.join(cmds)
    cmds += " 2>&1 | tee -a " + output + "/index_STAR_genomeFile.log"
    runCommand(cmds, True)
    return
Beispiel #8
0
def count_mapped(bamFile, outfile, gtfFile):
    """count mapped reads mapped to genome features
    -m union, intersection-nonempty
    """
    cmds = [
        'htseq-count',
        '-f sam',
        '-s no',
        '-a 10',
        '-t exon',
        '-i gene_id',
        '-m intersection-nonempty',
        bamFile,
        gtfFile,
        '>', outfile,

    ]
    cmds = '  '.join(cmds)
    runCommand(cmds, True)
Beispiel #9
0
def sameTissueBamMerge(input, output):
    if len(input) > 1:

        inFile = " ".join(input)
        myDir, baseFile = os.path.split(output)
        cmds = [
            'samtools merge',
            baseFile,
            inFile,
        ]
        cmds = ' '.join(cmds)
        runCommand(cmds, True)
        cmds2 = [
            'mv',
            baseFile,
            myDir,
        ]
        cmds2 = ' '.join(cmds2)
        runCommand(cmds2, True)
        return
    else:
        inFile = " ".join(input)
        cmds = [
            'cp ',
            inFile,
            output,
        ]
        cmds = ' '.join(cmds)
        runCommand(cmds, True)
        return
Beispiel #10
0
def index_db_file(input, output, cpuNum, gtfFile):
    """Index the probe fastfile to use as db file"""
    with open(input) as myfile:
        head = [next(myfile) for x in xrange(2)]
    #print head
    seq = head[1].strip()
    seqLen = len(seq)
    print seqLen
    print input
    cmd = [
        'grep',
        '">"',
        input,
        '| wc -l',
    ]
    cmd = '  '.join(cmd)
    totalProbes= runCommand(cmd, True)
    totalProbes = totalProbes[0].strip()
    print totalProbes
    genomeSize= int(seqLen)*int(totalProbes)
    print genomeSize
    scale_factor = np.fmin(14, np.log2(genomeSize)/2-1)
    scale_factor = np.round(scale_factor).astype(int)
    print scale_factor
    cmds = [
        'STAR',
        '--runMode genomeGenerate',
        '--genomeDir', output,
        '--genomeFastaFiles', input,
        '--sjdbGTFfile', gtfFile,
        '--sjdbGTFfeatureExon exon',
        '--runThreadN', str(cpuNum),
        '--genomeSAindexNbases', str(scale_factor),

    ]
    cmds = '  '.join(cmds)
    cmds += " 2>&1 | tee -a " + output + "/index_STAR_genomeFile.log"
    runCommand(cmds, True)
    return
Beispiel #11
0
def trimFastq(input, output, trimLeft, proFastq):
    """Clean fastq file using trim_galore"""
    cmds = [
        'trim_galore',
        '-q',
        str(20),
        '--stringency',
        str(5),
        '--trim1',
        '--clip_R1',
        str(trimLeft),
        '--trim-n',
        '--phred33',
        '--gzip',
        '--illumina',
        input,
        '-o',
        proFastq,
    ]
    cmds = '  '.join(cmds)
    runCommand(cmds, True)
    return
Beispiel #12
0
def map_seq_to_probes(fastq, genomeDir, numCPU, outPrefix):
    """Map the sequence to the probes genome file using STAR"""
    cmds = [
        'STAR',
        '--genomeDir',
        genomeDir,
        '--readFilesIn',
        fastq,
        '--readFilesCommand zcat',
        '--runThreadN ',
        str(numCPU),
        '--outFileNamePrefix',
        outPrefix,
        '--outSAMtype SAM',
        '--scoreDelOpen -10000',
        '--scoreInsOpen -10000',
        '--outFilterMismatchNmax 2',
        '--outSAMunmapped Within',
        '--outSAMattributes NH HI AS nM',
        ' --genomeLoad NoSharedMemory',
    ]
    cmds = '  '.join(cmds)
    runCommand(cmds, True)
Beispiel #13
0
def split_well_barcode(wellbarcode, fastqFile, inputDir, prefixWell, suffix):
    """split fastq based on plate barcode"""
    cmds = [
        'cat',
        fastqFile,
        '|',
        'fastx_barcode_splitter.pl',
        '--bcfile',
        wellbarcode,
        '--prefix',
        prefixWell,
        '--suffix',
        suffix,
        '--bol',
        '--partial',
        str(1),
        '--mismatches',
        str(1),
    ]
    welllog = prefixWell + "wellsplit_barcode.log"
    cmds = '  '.join(cmds)
    cmds += " 2>&1 | tee   " + welllog
    runCommand(cmds, True)
    return
Beispiel #14
0
def index_bam_file(bamfile):
    """Index bam files"""
    cmds = "samtools index %s" % (bamfile)
    runCommand(cmds, True)
Beispiel #15
0
def sortBamFile(bamfile, outSuffix):
    """docstring for sortBamFile"""
    cmds = "samtools  sort -m 1000000000  %s  %s" % (bamfile, outSuffix)
    runCommand(cmds, True)
Beispiel #16
0
def convertSamToBam(samfile, bamfileout):
    """Convert sam file to bam file"""
    cmds = "samtools view -b -S %s > %s" % (samfile, bamfileout)
    runCommand(cmds, True)
Beispiel #17
0
def countReadsMappedToProbes(bamindex, outfile):
    """docstring for countReadsMappedToProbes"""
    cmds = "samtools idxstats %s > %s " % (bamindex, outfile)
    runCommand(cmds, True)
Beispiel #18
0
def countReadsMappedToProbes(bamindex, outfile):
    """docstring for countReadsMappedToProbes"""
    cmds = "samtools idxstats %s > %s " %(bamindex, outfile)
    runCommand(cmds, True)
Beispiel #19
0
def index_bam_file(bamfile):
    """Index bam files"""
    cmds = "samtools index %s" %(bamfile)
    runCommand(cmds, True)
Beispiel #20
0
def sortBamFile(bamfile, outSuffix):
    """docstring for sortBamFile"""
    cmds = "samtools  sort -m 1000000000  %s  %s" %(bamfile, outSuffix)
    runCommand(cmds, True)
Beispiel #21
0
def convertSamToBam(samfile, bamfileout):
    """Convert sam file to bam file"""
    cmds = "samtools view -b -S %s > %s"%(samfile, bamfileout)
    runCommand(cmds, True)