def convertPs(psfile): """Utility function to convert ps file to pdf during test """ if os.path.isfile(psfile): cmd = "ps2pdf %s" % (psfile) runCommand(cmd, "T") else: pass return
def trim_Ad1_Ad2(input_fq, trimleft, trimright, output_fq): """Trim TEMPOseq adaptors from the fastq file, both 3' and 5' adaptors are 17 nc""" cmds = [ 'seqtk trimfq', '-b ', str(trimleft), '-e ', str(trimright), input_fq, '> ', output_fq, ] cmds = ' '.join(cmds) runCommand(cmds, True)
def createQuality(input, output): """Check quality of the fastqfile Arguments: -`input`: The input fastq file -`output`: The output folder for the analysis """ cmds = [ 'fastqc', input, '-o', output, ] cmds = ' '.join(cmds) cmds += " 2>&1 | tee -a " + output + "/analysis_quality.log" runCommand(cmds, True) return
def split_well_barcode(wellbarcode, fastqFile, inputDir, prefixWell, suffix): """split fastq based on plate barcode""" cmds = [ 'cat', fastqFile, '|', 'fastx_barcode_splitter.pl', '--bcfile',wellbarcode, '--prefix', prefixWell, '--suffix', suffix, '--bol', '--partial', str(1), '--mismatches', str(1), ] welllog = prefixWell + "wellsplit_barcode.log" cmds =' '.join(cmds) cmds +=" 2>&1 | tee " + welllog runCommand(cmds, True) return
def split_plate_barcode(barcodes,fastqFile, outDir, prefixPlate,suffix): """split fastq based on plate barcode""" platebc = barcodes[0] cmds = [ 'cat', fastqFile, '|', 'fastx_barcode_splitter.pl', '--bcfile', platebc, '--prefix', prefixPlate, '--suffix', suffix, '--eol', '--partial', str(1), '--mismatches', str(1), ] plateLog = prefixPlate + "platesplit_barcode.log" cmds =' '.join(cmds) cmds +=" 2>&1 | tee " + plateLog runCommand(cmds, True) return
def map_seq_to_probes(fastq, genomeDir, numCPU, outPrefix): """Map the sequence to the probes genome file using STAR""" cmds = [ 'STAR', '--genomeDir', genomeDir, '--readFilesIn', fastq, '--readFilesCommand zcat', '--runThreadN ', str(numCPU), '--outFileNamePrefix', outPrefix, '--outSAMtype SAM', '--scoreDelOpen -10000', '--scoreInsOpen -10000', '--outFilterMismatchNmax 2', '--outSAMunmapped Within', '--outSAMattributes AS nM', ' --genomeLoad NoSharedMemory', ] cmds = ' '.join(cmds) runCommand(cmds, True)
def index_db_file(input, output, cpuNum, gtfFile): """Index the probe fastfile to use as db file""" with open(input) as myfile: head = [next(myfile) for x in xrange(2)] #print head seq = head[1].strip() seqLen = len(seq) print seqLen print input cmd = [ 'grep', '">"', input, '| wc -l', ] cmd = ' '.join(cmd) totalProbes = runCommand(cmd, True) totalProbes = totalProbes[0].strip() print totalProbes genomeSize = int(seqLen) * int(totalProbes) print genomeSize scale_factor = np.fmin(14, np.log2(genomeSize) / 2 - 1) scale_factor = np.round(scale_factor).astype(int) print scale_factor cmds = [ 'STAR', '--runMode genomeGenerate', '--genomeDir', output, '--genomeFastaFiles', input, '--sjdbGTFfile', gtfFile, '--sjdbGTFfeatureExon exon', '--runThreadN', str(cpuNum), '--genomeSAindexNbases', str(scale_factor), ] cmds = ' '.join(cmds) cmds += " 2>&1 | tee -a " + output + "/index_STAR_genomeFile.log" runCommand(cmds, True) return
def count_mapped(bamFile, outfile, gtfFile): """count mapped reads mapped to genome features -m union, intersection-nonempty """ cmds = [ 'htseq-count', '-f sam', '-s no', '-a 10', '-t exon', '-i gene_id', '-m intersection-nonempty', bamFile, gtfFile, '>', outfile, ] cmds = ' '.join(cmds) runCommand(cmds, True)
def sameTissueBamMerge(input, output): if len(input) > 1: inFile = " ".join(input) myDir, baseFile = os.path.split(output) cmds = [ 'samtools merge', baseFile, inFile, ] cmds = ' '.join(cmds) runCommand(cmds, True) cmds2 = [ 'mv', baseFile, myDir, ] cmds2 = ' '.join(cmds2) runCommand(cmds2, True) return else: inFile = " ".join(input) cmds = [ 'cp ', inFile, output, ] cmds = ' '.join(cmds) runCommand(cmds, True) return
def index_db_file(input, output, cpuNum, gtfFile): """Index the probe fastfile to use as db file""" with open(input) as myfile: head = [next(myfile) for x in xrange(2)] #print head seq = head[1].strip() seqLen = len(seq) print seqLen print input cmd = [ 'grep', '">"', input, '| wc -l', ] cmd = ' '.join(cmd) totalProbes= runCommand(cmd, True) totalProbes = totalProbes[0].strip() print totalProbes genomeSize= int(seqLen)*int(totalProbes) print genomeSize scale_factor = np.fmin(14, np.log2(genomeSize)/2-1) scale_factor = np.round(scale_factor).astype(int) print scale_factor cmds = [ 'STAR', '--runMode genomeGenerate', '--genomeDir', output, '--genomeFastaFiles', input, '--sjdbGTFfile', gtfFile, '--sjdbGTFfeatureExon exon', '--runThreadN', str(cpuNum), '--genomeSAindexNbases', str(scale_factor), ] cmds = ' '.join(cmds) cmds += " 2>&1 | tee -a " + output + "/index_STAR_genomeFile.log" runCommand(cmds, True) return
def trimFastq(input, output, trimLeft, proFastq): """Clean fastq file using trim_galore""" cmds = [ 'trim_galore', '-q', str(20), '--stringency', str(5), '--trim1', '--clip_R1', str(trimLeft), '--trim-n', '--phred33', '--gzip', '--illumina', input, '-o', proFastq, ] cmds = ' '.join(cmds) runCommand(cmds, True) return
def map_seq_to_probes(fastq, genomeDir, numCPU, outPrefix): """Map the sequence to the probes genome file using STAR""" cmds = [ 'STAR', '--genomeDir', genomeDir, '--readFilesIn', fastq, '--readFilesCommand zcat', '--runThreadN ', str(numCPU), '--outFileNamePrefix', outPrefix, '--outSAMtype SAM', '--scoreDelOpen -10000', '--scoreInsOpen -10000', '--outFilterMismatchNmax 2', '--outSAMunmapped Within', '--outSAMattributes NH HI AS nM', ' --genomeLoad NoSharedMemory', ] cmds = ' '.join(cmds) runCommand(cmds, True)
def split_well_barcode(wellbarcode, fastqFile, inputDir, prefixWell, suffix): """split fastq based on plate barcode""" cmds = [ 'cat', fastqFile, '|', 'fastx_barcode_splitter.pl', '--bcfile', wellbarcode, '--prefix', prefixWell, '--suffix', suffix, '--bol', '--partial', str(1), '--mismatches', str(1), ] welllog = prefixWell + "wellsplit_barcode.log" cmds = ' '.join(cmds) cmds += " 2>&1 | tee " + welllog runCommand(cmds, True) return
def index_bam_file(bamfile): """Index bam files""" cmds = "samtools index %s" % (bamfile) runCommand(cmds, True)
def sortBamFile(bamfile, outSuffix): """docstring for sortBamFile""" cmds = "samtools sort -m 1000000000 %s %s" % (bamfile, outSuffix) runCommand(cmds, True)
def convertSamToBam(samfile, bamfileout): """Convert sam file to bam file""" cmds = "samtools view -b -S %s > %s" % (samfile, bamfileout) runCommand(cmds, True)
def countReadsMappedToProbes(bamindex, outfile): """docstring for countReadsMappedToProbes""" cmds = "samtools idxstats %s > %s " % (bamindex, outfile) runCommand(cmds, True)
def countReadsMappedToProbes(bamindex, outfile): """docstring for countReadsMappedToProbes""" cmds = "samtools idxstats %s > %s " %(bamindex, outfile) runCommand(cmds, True)
def index_bam_file(bamfile): """Index bam files""" cmds = "samtools index %s" %(bamfile) runCommand(cmds, True)
def sortBamFile(bamfile, outSuffix): """docstring for sortBamFile""" cmds = "samtools sort -m 1000000000 %s %s" %(bamfile, outSuffix) runCommand(cmds, True)
def convertSamToBam(samfile, bamfileout): """Convert sam file to bam file""" cmds = "samtools view -b -S %s > %s"%(samfile, bamfileout) runCommand(cmds, True)