Python chr_names_conv 예제들, bfx.genome.chr_names_conv Python 예제들

예제 #1

0

파일 보기

파일: hicseq.py 프로젝트: xuwei684/gsoc2019mugqic

    def interaction_matrices_Chr(self):
        """
        IntraChromosomal interaction matrices are produced by Homer at resolutions defined in the ini config file and plotted by HiCPlotter.
        For more detailed information about the HOMER matrices visit: [HOMER matrices] (http://homer.ucsd.edu/homer/interactions/HiCmatrices.html)
        For more detailed information about HiCPlotter visit: [HiCPlotter] (https://github.com/kcakdemir/HiCPlotter)
        """

        jobs = []

        chrs = config.param('interaction_matrices_Chr', 'chromosomes')
        res_chr = config.param('interaction_matrices_Chr', 'resolution_chr').split(",")

        if chrs == "All":
            genome_dict = os.path.expandvars(config.param('DEFAULT', 'genome_dictionary', type='filepath'))
            chrs = genome.chr_names_conv(genome_dict)
        else:
            chrs = chrs.split(",")

        for sample in self.samples:
            tagDirName = "_".join(("HTD", sample.name, self.enzyme))
            homer_output_dir = os.path.join(self.output_dirs['homer_output_directory'], tagDirName)
            sample_output_dir_chr = os.path.join(self.output_dirs['matrices_output_directory'], sample.name, "chromosomeMatrices")

            # loop over chrs and res:
            for res in res_chr:
                for chr in chrs:

                    fileName = os.path.join(sample_output_dir_chr, "_".join((tagDirName, chr, res, "raw.txt")))
                    fileNameRN = os.path.join(sample_output_dir_chr, "_".join((tagDirName, chr, res, "rawRN.txt")))
                    fileNamePlot = os.path.join(sample_output_dir_chr, "".join((tagDirName,"_", chr,"_", res, "_raw-", chr, "\'.ofBins(0-\'*\')\'.", str(int(res)/1000), "K.jpeg")))
                    newFileNamePlot = os.path.join(sample_output_dir_chr, "".join((tagDirName,"_", chr,"_", res, "_raw-", chr, ".all.", str(int(res)/1000), "K.jpeg")))

                    jobMatrix = homer.hic_interactionMatrix_chr(sample.name, sample_output_dir_chr, homer_output_dir, res, chr, fileName, fileNameRN)
                    jobMatrix.samples = [sample]

                    jobPlot = hicplotter.intra_chrom_matrix_plot(
                        fileNameRN,
                        sample.name,
                        chr,
                        res,
                        os.path.join(sample_output_dir_chr, "_".join((tagDirName, chr, res, "raw"))),
                        fileNamePlot,
                        newFileNamePlot
                    )
                    jobPlot.samples = [sample]

                    jobs.extend([jobMatrix, jobPlot])

        return jobs

예제 #2

0

파일 보기

파일: hicseq.py 프로젝트: xuwei684/gsoc2019mugqic

    def identify_TADs_TopDom(self):
        """
        Topological associating Domains (TADs) are identified using TopDom at resolutions defined in the ini config file.
        For more detailed information about the TopDom visit: [TopDom] (https://www.ncbi.nlm.nih.gov/pubmed/26704975)
        """

        jobs = []

        chrs = config.param('identify_TADs', 'chromosomes')
        res_chr = config.param('identify_TADs', 'resolution_TADs').split(",")

        if chrs == "All":
            genome_dict = os.path.expandvars(config.param('DEFAULT', 'genome_dictionary', type='filepath'))
            chrs = genome.chr_names_conv(genome_dict)
        else:
            chrs = chrs.split(",")

        for sample in self.samples:
            sample_output_dir = os.path.join(self.output_dirs['TAD_output_directory'], sample.name, "TopDom")

            for res in res_chr:
                for chr in chrs:

                    input_matrix = os.path.join(self.output_dirs['matrices_output_directory'], sample.name, "chromosomeMatrices", "_".join(("HTD", sample.name, self.enzyme, chr, res, "rawRN.txt")))
                    tmp_matrix = input_matrix + ".MatA"
                    output_matrix = os.path.join(sample_output_dir, "_".join(("HTD", sample.name, self.enzyme, chr, res, "rawRN.MatA.TopDom")))
                    output_script = "identify_TADs_TopDom." + sample.name + "_" + chr + "_res" + res + ".R"

                    job_inputFile = concat_jobs(
                        [
                            Job(command="mkdir -p " + sample_output_dir),
                            topdom.create_input(input_matrix, tmp_matrix, output_matrix, output_script, res)
                        ],
                        name="identify_TADs.TopDom_create_input." + sample.name + "_" + chr + "_res" + res,
                        samples=[sample]
                    )

                    job_TADs = topdom.call_TADs(tmp_matrix, output_matrix, output_script)
                    job_TADs.name = "identify_TADs.TopDom_call_TADs." + sample.name + "_" + chr + "_res" + res
                    job_TADs.samples = [sample]

                    jobs.extend([
                        job_inputFile,
                        job_TADs
                    ])

        return jobs

예제 #3

0

파일 보기

파일: hicseq.py 프로젝트: xuwei684/gsoc2019mugqic

    def identify_TADs_RobusTAD(self):
        """
        Topological associating Domain (TAD) scores are calculated using RobusTAD for every bin in the genome.
        RobusTAD is resolution-independant and will use the first resolution in "resolution_TADs"  under [identify_TADs] in the ini file.
        For more detailed information about the RobusTAD visit: [RobusTAD] (https://github.com/rdali/RobusTAD)
        """

        jobs = []

        chrs = config.param('identify_TADs', 'chromosomes')
        res = config.param('identify_TADs', 'resolution_TADs').split(",")[0]

        if chrs == "All":
            genome_dict = os.path.expandvars(config.param('DEFAULT', 'genome_dictionary', type='filepath'))
            chrs = genome.chr_names_conv(genome_dict)
        else:
            chrs = chrs.split(",")

        for sample in self.samples:
            sample_output_dir = os.path.join(self.output_dirs['TAD_output_directory'], sample.name, "RobusTAD")

            for chr in chrs:

                input_matrix = os.path.join(self.output_dirs['matrices_output_directory'], sample.name, "chromosomeMatrices", "_".join(("HTD", sample.name, self.enzyme, chr, res, "rawRN.txt")))
                prefix = os.path.splitext(os.path.basename(input_matrix))[0]
                output_Scores = os.path.join(sample_output_dir, "".join(("BoundaryScores_", prefix, "_binSize" , str(int(res)/1000) ,"_minW250_maxW500_minRatio1.5.txt")))
                output_calls = os.path.join(sample_output_dir, "".join(("TADBoundaryCalls_", prefix, "_binSize" , str(int(res)/1000) ,"_minW250_maxW500_minRatio1.5_threshold0.2.txt")))

                job = concat_jobs([
                    Job(command="mkdir -p " + sample_output_dir),
                    robustad.call_TADs(input_matrix, sample_output_dir, res)
                ])
                job.name = "identify_TADs.RobusTAD." + sample.name + "_" + chr
                job.samples = [sample]

                jobs.append(job)

        return jobs

예제 #4

0

파일 보기

파일: hicseq.py 프로젝트: marquispa/test

    def identify_TADs(self):
        """
        Topological associating Domains (TADs) are identified using TopDom at resolutions defined in the ini config file.
        For more detailed information about the TopDom visit: [TopDom] (https://www.ncbi.nlm.nih.gov/pubmed/26704975)
        """

        jobs = []

        chrs = config.param('identify_TADs', 'chromosomes')
        res_chr = config.param('identify_TADs', 'resolution_TADs').split(",")

        if chrs == "All":
            genome_dict = os.path.expandvars(
                config.param('DEFAULT', 'genome_dictionary', type='filepath'))
            chrs = genome.chr_names_conv(genome_dict)
        else:
            chrs = chrs.split(",")

        for sample in self.samples:
            sample_output_dir = os.path.join(
                self.output_dirs['TAD_output_directory'], sample.name)

            for res in res_chr:
                for chr in chrs:

                    input_matrix = os.path.join(
                        self.output_dirs['matrices_output_directory'],
                        sample.name, "chromosomeMatrices", "_".join(
                            ("HTD", sample.name, self.enzyme, chr, res,
                             "rawRN.txt")))
                    tmp_matrix = input_matrix + ".MatA"
                    output_matrix = os.path.join(
                        sample_output_dir, "_".join(
                            ("HTD", sample.name, self.enzyme, chr, res,
                             "rawRN.MatA.TopDom")))

                    ## make TopDom R script:
                    FileContent = """source(\\\"{script}\\\"); TopDom(matrix.file=\'{tmp_matrix}\', window.size={n}, outFile=\'{output_matrix}\')""".format(
                        script=os.path.expandvars(
                            "${R_TOOLS}/TopDom_v0.0.2.R"),
                        tmp_matrix=tmp_matrix,
                        n=config.param('identify_TADs', 'TopDom_n'),
                        output_matrix=output_matrix)

                    fileName = "identify_TADs_TopDom." + sample.name + "_" + chr + "_res" + res + ".R"
                    command_RFile = """echo \"{FileContent}\" > {fileName}""".format(
                        FileContent=FileContent, fileName=fileName)

                    command_TopDom = """mkdir -p {sample_output_dir} && {script} {input} {res}""".format(
                        sample_output_dir=sample_output_dir,
                        script="CreateTopDomMat.sh",
                        input=input_matrix,
                        res=res)

                    job_inputFile = Job(
                        input_files=[input_matrix],
                        output_files=[tmp_matrix],
                        module_entries=[["identify_TADs", "module_R"],
                                        [
                                            "identify_TADs",
                                            "module_mugqic_tools"
                                        ]],
                        name="identify_TADs.create_input." + sample.name +
                        "_" + chr + "_res" + res,
                        command=command_RFile + " && " + command_TopDom,
                        removable_files=[tmp_matrix])
                    job_TADs = Job(
                        input_files=[tmp_matrix],
                        output_files=[
                            output_matrix + ".bed",
                            output_matrix + ".binSignal",
                            output_matrix + ".domain"
                        ],
                        module_entries=[["identify_TADs", "module_R"],
                                        [
                                            "identify_TADs",
                                            "module_mugqic_tools"
                                        ]],
                        name="identify_TADs.call_TADs." + sample.name + "_" +
                        chr + "_res" + res,
                        command="Rscript {fileName} && rm {fileName}".format(
                            fileName=fileName, tmp_matrix=tmp_matrix),
                        removable_files=[tmp_matrix])

                    jobs.extend([job_inputFile, job_TADs])
        return jobs

예제 #5

0

파일 보기

파일: hicseq.py 프로젝트: marquispa/test

    def interaction_matrices_Chr(self):
        """
        IntraChromosomal interaction matrices are produced by Homer at resolutions defined in the ini config file and plotted by HiCPlotter.
        For more detailed information about the HOMER matrices visit: [HOMER matrices] (http://homer.ucsd.edu/homer/interactions/HiCmatrices.html)
        For more detailed information about HiCPlotter visit: [HiCPlotter] (https://github.com/kcakdemir/HiCPlotter)
        """

        jobs = []

        chrs = config.param('interaction_matrices_Chr', 'chromosomes')
        res_chr = config.param('interaction_matrices_Chr',
                               'resolution_chr').split(",")

        if chrs == "All":
            genome_dict = os.path.expandvars(
                config.param('DEFAULT', 'genome_dictionary', type='filepath'))
            chrs = genome.chr_names_conv(genome_dict)
        else:
            chrs = chrs.split(",")

        for sample in self.samples:
            tagDirName = "_".join(("HTD", sample.name, self.enzyme))
            homer_output_dir = os.path.join(
                self.output_dirs['homer_output_directory'], tagDirName)
            sample_output_dir_chr = os.path.join(
                self.output_dirs['matrices_output_directory'], sample.name,
                "chromosomeMatrices")

            # loop over chrs and res:
            for res in res_chr:
                for chr in chrs:

                    fileName = os.path.join(
                        sample_output_dir_chr, "_".join(
                            (tagDirName, chr, res, "raw.txt")))
                    fileNameRN = os.path.join(
                        sample_output_dir_chr, "_".join(
                            (tagDirName, chr, res, "rawRN.txt")))

                    jobMatrix = homer.hic_interactionMatrix_chr(
                        sample.name, sample_output_dir_chr, homer_output_dir,
                        res, chr, fileName, fileNameRN)

                    fileNamePlot = os.path.join(
                        sample_output_dir_chr,
                        "".join((tagDirName, "_", chr, "_", res, "_raw-",
                                 chr, "\'.ofBins(0-\'*\')\'.",
                                 str(int(res) / 1000), "K.jpeg")))
                    newFileNamePlot = os.path.join(
                        sample_output_dir_chr, "".join(
                            (tagDirName, "_", chr, "_", res, "_raw-", chr,
                             ".all.", str(int(res) / 1000), "K.jpeg")))
                    commandChrPlot = "HiCPlotter.py -f {fileNameRN} -n {name} -chr {chr} -r {res} -fh 0 -o {sample_output_dir_chr} -ptr 0 -hmc {hmc} && mv {fileNamePlot} {newFileNamePlot}".format(
                        res=res,
                        chr=chr,
                        fileNameRN=fileNameRN,
                        name=sample.name,
                        sample_output_dir_chr=os.path.join(
                            sample_output_dir_chr, "_".join(
                                (tagDirName, chr, res, "raw"))),
                        hmc=config.param('interaction_matrices_Chr', 'hmc'),
                        fileNamePlot=fileNamePlot,
                        newFileNamePlot=newFileNamePlot)

                    jobPlot = Job(input_files=[fileNameRN],
                                  output_files=[newFileNamePlot],
                                  module_entries=[[
                                      "interaction_matrices_Chr",
                                      "module_HiCPlotter"
                                  ]],
                                  name="interaction_matrices_Chr.plotting." +
                                  sample.name + "_" + chr + "_res" + res,
                                  command=commandChrPlot)

                    jobs.extend([jobMatrix, jobPlot])

        return jobs