def tophatBam_BigWig(project_root):
    """
    input: project root. real input is the accepted_hits.bam
    structure: the project root/tophat/sampleName/accepted_bam
    :param project_root:
    :return: bigwig file for making track
    """
    # create track/, track_bin/ , create file  UCSC_trackTxt.txt
    track_dir = os.path.join(project_root, "track")
    if not os.path.exists(track_dir):
        os.mkdir(track_dir)
    track_bin = os.path.join(project_root, "track_bin")
    if not os.path.exists(track_bin):
        os.mkdir(track_bin)

    # find accepted_hits.bam files
    bams = os.path.join(project_root, "tophat/*", "accepted_hits.bam")
    for bam in glob.glob(bams):
        sample_name = bam.split("/")[
            -2]  # get sample name, one level up from .bam
        cmds = cmds_bamToBigWig(sample_name, track_dir, bam)
        binname = sample_name + "_track"
        # cmd.generate_pbs(cmds=cmds, binName=binname, binPath=track_bin)
        cmd.generate_submit_pbs(cmds=cmds, binName=binname, binPath=track_bin)
    pass
def main():
    projectDir = sys.argv[
        1] + "tophat"  # /archive2/tmhyxb9/FBL/fastq/rmUMI/MAPPING_EXON/tophat_exon/
    samples = os.listdir(projectDir)
    gtfBeds = ["CDS", "5UTR", "3UTR", "exon", "intron", "whole"]
    for sample in samples:
        inputDir = os.path.join(projectDir, sample, "sortedBed")
        for gtf in gtfBeds:
            cmds = map_gtf_bed(inputDir, gtf, sample)
            binPath = os.path.join(projectDir, sample, "bins")
            binName = sample + "_" + gtf
            # cmd.generate_pbs(cmds, binName, binPath)
            cmd.generate_submit_pbs(cmds, binName, binPath)
    pass
Exemple #3
0
def main():
    inputfiles = [
        "/archive2/tmhyxb9/FBL/fastq/Ctr2_sh/control2.fq",
        "/archive2/tmhyxb9/FBL/fastq/FBL_sh/FBL.fq",
        "/archive2/tmhyxb9/FBL/fastq/FBL2_sh/FBL2.fq",
        "/archive2/tmhyxb9/FBL/fastq/EZH2_sh1/EZHsh1_1.fq",
        "/archive2/tmhyxb9/FBL/fastq/EZH22_sh1/EZHsh1_2.fq",
        "/archive2/tmhyxb9/FBL/fastq/EZH2_sh2/EZHsh2_1.fq",
        "/archive2/tmhyxb9/FBL/fastq/EZH22_sh2/EZHsh2_2.fq"
    ]

    for file in inputfiles:
        outputDir = "/archive2/tmhyxb9/FBL/fastq/rmUMI/fastq/"
        binname = "rmUMI_" + os.path.basename(file).split(".")[0]
        cmds = generate_cmds(file, outputDir)
        binpath = "/archive2/tmhyxb9/FBL/fastq/rmUMI/bin"
        cmd.generate_submit_pbs(cmds=cmds, binName=binname, binPath=binpath)
    pass
Exemple #4
0
def main():
    projectDir = sys.argv[
        1] + "tophat"  #/archive2/tmhyxb9/FBL/fastq/rmUMI/MAPPING_EXON/tophat_exon/
    samples = os.listdir(projectDir)
    for sample in samples:
        inputDir = os.path.join(projectDir, sample)
        outputDir = os.path.join(inputDir, "sortedBed")
        if not os.path.exists(outputDir):
            os.mkdir(outputDir)
        cmds = generate_cmds(inputDir=inputDir, outputDir=outputDir)

        binPath = os.path.join(inputDir, "bins")
        if not os.path.exists(binPath):
            os.mkdir(binPath)
        binName = os.path.join(binPath, sample + "_bam2bed")
        # cmd.generate_pbs(cmds, binName, binPath)
        cmd.generate_submit_pbs(cmds, binName, binPath)
    pass
Exemple #5
0
import pandas as pd
import os, os.path, sys

sys.path.insert(0, "/archive2/tmhyxb9/ToolBox")
import cmd


samples = ["Control", "FBL", "EZH2sh1", "EZH2sh2"]
for sample in samples:
    geneGTF = "/archive2/tmhyxb9/ref_data/hg19/hg19.ucscgenes.knowngene.gtf"
    outputFile = "/archive2/tmhyxb9/FBL/RNA_seq/tophat_pair/HTseq_RNAseq/HTseq_RNAseq_results" + sample + "_RNAC_raw_count.txt"

    cmd0 = "cd /archive2/tmhyxb9/FBL/RNA_seq/tophat_pair/tophat/" + sample

    cmd1 = "module load samtools/1.9"
    cmd2 = "samtools sort accepted_hits.bam > accepted_hits.sorted.bam"
    cmd3 = "samtools index accepted_hits.sorted.bam"

    cmd4 = "module load python/2.7.11"

    cmd5 = "htseq-count -f bam accepted_hits.sorted.bam -s no -m intersection-nonempty " + geneGTF  + " > " + outputFile

    cmds = [cmd0, cmd1, cmd2, cmd3, cmd4, cmd5]
    binPath = "/archive2/tmhyxb9/FBL/RNA_seq/tophat_pair/HTseq_RNAseq/bin"
    cmd.generate_submit_pbs(cmds = cmds , binName = sample, binPath = binPath)