def tophatBam_BigWig(project_root): """ input: project root. real input is the accepted_hits.bam structure: the project root/tophat/sampleName/accepted_bam :param project_root: :return: bigwig file for making track """ # create track/, track_bin/ , create file UCSC_trackTxt.txt track_dir = os.path.join(project_root, "track") if not os.path.exists(track_dir): os.mkdir(track_dir) track_bin = os.path.join(project_root, "track_bin") if not os.path.exists(track_bin): os.mkdir(track_bin) # find accepted_hits.bam files bams = os.path.join(project_root, "tophat/*", "accepted_hits.bam") for bam in glob.glob(bams): sample_name = bam.split("/")[ -2] # get sample name, one level up from .bam cmds = cmds_bamToBigWig(sample_name, track_dir, bam) binname = sample_name + "_track" # cmd.generate_pbs(cmds=cmds, binName=binname, binPath=track_bin) cmd.generate_submit_pbs(cmds=cmds, binName=binname, binPath=track_bin) pass
def main(): projectDir = sys.argv[ 1] + "tophat" # /archive2/tmhyxb9/FBL/fastq/rmUMI/MAPPING_EXON/tophat_exon/ samples = os.listdir(projectDir) gtfBeds = ["CDS", "5UTR", "3UTR", "exon", "intron", "whole"] for sample in samples: inputDir = os.path.join(projectDir, sample, "sortedBed") for gtf in gtfBeds: cmds = map_gtf_bed(inputDir, gtf, sample) binPath = os.path.join(projectDir, sample, "bins") binName = sample + "_" + gtf # cmd.generate_pbs(cmds, binName, binPath) cmd.generate_submit_pbs(cmds, binName, binPath) pass
def main(): inputfiles = [ "/archive2/tmhyxb9/FBL/fastq/Ctr2_sh/control2.fq", "/archive2/tmhyxb9/FBL/fastq/FBL_sh/FBL.fq", "/archive2/tmhyxb9/FBL/fastq/FBL2_sh/FBL2.fq", "/archive2/tmhyxb9/FBL/fastq/EZH2_sh1/EZHsh1_1.fq", "/archive2/tmhyxb9/FBL/fastq/EZH22_sh1/EZHsh1_2.fq", "/archive2/tmhyxb9/FBL/fastq/EZH2_sh2/EZHsh2_1.fq", "/archive2/tmhyxb9/FBL/fastq/EZH22_sh2/EZHsh2_2.fq" ] for file in inputfiles: outputDir = "/archive2/tmhyxb9/FBL/fastq/rmUMI/fastq/" binname = "rmUMI_" + os.path.basename(file).split(".")[0] cmds = generate_cmds(file, outputDir) binpath = "/archive2/tmhyxb9/FBL/fastq/rmUMI/bin" cmd.generate_submit_pbs(cmds=cmds, binName=binname, binPath=binpath) pass
def main(): projectDir = sys.argv[ 1] + "tophat" #/archive2/tmhyxb9/FBL/fastq/rmUMI/MAPPING_EXON/tophat_exon/ samples = os.listdir(projectDir) for sample in samples: inputDir = os.path.join(projectDir, sample) outputDir = os.path.join(inputDir, "sortedBed") if not os.path.exists(outputDir): os.mkdir(outputDir) cmds = generate_cmds(inputDir=inputDir, outputDir=outputDir) binPath = os.path.join(inputDir, "bins") if not os.path.exists(binPath): os.mkdir(binPath) binName = os.path.join(binPath, sample + "_bam2bed") # cmd.generate_pbs(cmds, binName, binPath) cmd.generate_submit_pbs(cmds, binName, binPath) pass
import pandas as pd import os, os.path, sys sys.path.insert(0, "/archive2/tmhyxb9/ToolBox") import cmd samples = ["Control", "FBL", "EZH2sh1", "EZH2sh2"] for sample in samples: geneGTF = "/archive2/tmhyxb9/ref_data/hg19/hg19.ucscgenes.knowngene.gtf" outputFile = "/archive2/tmhyxb9/FBL/RNA_seq/tophat_pair/HTseq_RNAseq/HTseq_RNAseq_results" + sample + "_RNAC_raw_count.txt" cmd0 = "cd /archive2/tmhyxb9/FBL/RNA_seq/tophat_pair/tophat/" + sample cmd1 = "module load samtools/1.9" cmd2 = "samtools sort accepted_hits.bam > accepted_hits.sorted.bam" cmd3 = "samtools index accepted_hits.sorted.bam" cmd4 = "module load python/2.7.11" cmd5 = "htseq-count -f bam accepted_hits.sorted.bam -s no -m intersection-nonempty " + geneGTF + " > " + outputFile cmds = [cmd0, cmd1, cmd2, cmd3, cmd4, cmd5] binPath = "/archive2/tmhyxb9/FBL/RNA_seq/tophat_pair/HTseq_RNAseq/bin" cmd.generate_submit_pbs(cmds = cmds , binName = sample, binPath = binPath)