def interaction_matrices_Chr(self): """ IntraChromosomal interaction matrices are produced by Homer at resolutions defined in the ini config file and plotted by HiCPlotter. For more detailed information about the HOMER matrices visit: [HOMER matrices] (http://homer.ucsd.edu/homer/interactions/HiCmatrices.html) For more detailed information about HiCPlotter visit: [HiCPlotter] (https://github.com/kcakdemir/HiCPlotter) """ jobs = [] chrs = config.param('interaction_matrices_Chr', 'chromosomes') res_chr = config.param('interaction_matrices_Chr', 'resolution_chr').split(",") if chrs == "All": genome_dict = os.path.expandvars(config.param('DEFAULT', 'genome_dictionary', type='filepath')) chrs = genome.chr_names_conv(genome_dict) else: chrs = chrs.split(",") for sample in self.samples: tagDirName = "_".join(("HTD", sample.name, self.enzyme)) homer_output_dir = os.path.join(self.output_dirs['homer_output_directory'], tagDirName) sample_output_dir_chr = os.path.join(self.output_dirs['matrices_output_directory'], sample.name, "chromosomeMatrices") # loop over chrs and res: for res in res_chr: for chr in chrs: fileName = os.path.join(sample_output_dir_chr, "_".join((tagDirName, chr, res, "raw.txt"))) fileNameRN = os.path.join(sample_output_dir_chr, "_".join((tagDirName, chr, res, "rawRN.txt"))) fileNamePlot = os.path.join(sample_output_dir_chr, "".join((tagDirName,"_", chr,"_", res, "_raw-", chr, "\'.ofBins(0-\'*\')\'.", str(int(res)/1000), "K.jpeg"))) newFileNamePlot = os.path.join(sample_output_dir_chr, "".join((tagDirName,"_", chr,"_", res, "_raw-", chr, ".all.", str(int(res)/1000), "K.jpeg"))) jobMatrix = homer.hic_interactionMatrix_chr(sample.name, sample_output_dir_chr, homer_output_dir, res, chr, fileName, fileNameRN) jobMatrix.samples = [sample] jobPlot = hicplotter.intra_chrom_matrix_plot( fileNameRN, sample.name, chr, res, os.path.join(sample_output_dir_chr, "_".join((tagDirName, chr, res, "raw"))), fileNamePlot, newFileNamePlot ) jobPlot.samples = [sample] jobs.extend([jobMatrix, jobPlot]) return jobs
def identify_TADs_TopDom(self): """ Topological associating Domains (TADs) are identified using TopDom at resolutions defined in the ini config file. For more detailed information about the TopDom visit: [TopDom] (https://www.ncbi.nlm.nih.gov/pubmed/26704975) """ jobs = [] chrs = config.param('identify_TADs', 'chromosomes') res_chr = config.param('identify_TADs', 'resolution_TADs').split(",") if chrs == "All": genome_dict = os.path.expandvars(config.param('DEFAULT', 'genome_dictionary', type='filepath')) chrs = genome.chr_names_conv(genome_dict) else: chrs = chrs.split(",") for sample in self.samples: sample_output_dir = os.path.join(self.output_dirs['TAD_output_directory'], sample.name, "TopDom") for res in res_chr: for chr in chrs: input_matrix = os.path.join(self.output_dirs['matrices_output_directory'], sample.name, "chromosomeMatrices", "_".join(("HTD", sample.name, self.enzyme, chr, res, "rawRN.txt"))) tmp_matrix = input_matrix + ".MatA" output_matrix = os.path.join(sample_output_dir, "_".join(("HTD", sample.name, self.enzyme, chr, res, "rawRN.MatA.TopDom"))) output_script = "identify_TADs_TopDom." + sample.name + "_" + chr + "_res" + res + ".R" job_inputFile = concat_jobs( [ Job(command="mkdir -p " + sample_output_dir), topdom.create_input(input_matrix, tmp_matrix, output_matrix, output_script, res) ], name="identify_TADs.TopDom_create_input." + sample.name + "_" + chr + "_res" + res, samples=[sample] ) job_TADs = topdom.call_TADs(tmp_matrix, output_matrix, output_script) job_TADs.name = "identify_TADs.TopDom_call_TADs." + sample.name + "_" + chr + "_res" + res job_TADs.samples = [sample] jobs.extend([ job_inputFile, job_TADs ]) return jobs
def identify_TADs_RobusTAD(self): """ Topological associating Domain (TAD) scores are calculated using RobusTAD for every bin in the genome. RobusTAD is resolution-independant and will use the first resolution in "resolution_TADs" under [identify_TADs] in the ini file. For more detailed information about the RobusTAD visit: [RobusTAD] (https://github.com/rdali/RobusTAD) """ jobs = [] chrs = config.param('identify_TADs', 'chromosomes') res = config.param('identify_TADs', 'resolution_TADs').split(",")[0] if chrs == "All": genome_dict = os.path.expandvars(config.param('DEFAULT', 'genome_dictionary', type='filepath')) chrs = genome.chr_names_conv(genome_dict) else: chrs = chrs.split(",") for sample in self.samples: sample_output_dir = os.path.join(self.output_dirs['TAD_output_directory'], sample.name, "RobusTAD") for chr in chrs: input_matrix = os.path.join(self.output_dirs['matrices_output_directory'], sample.name, "chromosomeMatrices", "_".join(("HTD", sample.name, self.enzyme, chr, res, "rawRN.txt"))) prefix = os.path.splitext(os.path.basename(input_matrix))[0] output_Scores = os.path.join(sample_output_dir, "".join(("BoundaryScores_", prefix, "_binSize" , str(int(res)/1000) ,"_minW250_maxW500_minRatio1.5.txt"))) output_calls = os.path.join(sample_output_dir, "".join(("TADBoundaryCalls_", prefix, "_binSize" , str(int(res)/1000) ,"_minW250_maxW500_minRatio1.5_threshold0.2.txt"))) job = concat_jobs([ Job(command="mkdir -p " + sample_output_dir), robustad.call_TADs(input_matrix, sample_output_dir, res) ]) job.name = "identify_TADs.RobusTAD." + sample.name + "_" + chr job.samples = [sample] jobs.append(job) return jobs
def identify_TADs(self): """ Topological associating Domains (TADs) are identified using TopDom at resolutions defined in the ini config file. For more detailed information about the TopDom visit: [TopDom] (https://www.ncbi.nlm.nih.gov/pubmed/26704975) """ jobs = [] chrs = config.param('identify_TADs', 'chromosomes') res_chr = config.param('identify_TADs', 'resolution_TADs').split(",") if chrs == "All": genome_dict = os.path.expandvars( config.param('DEFAULT', 'genome_dictionary', type='filepath')) chrs = genome.chr_names_conv(genome_dict) else: chrs = chrs.split(",") for sample in self.samples: sample_output_dir = os.path.join( self.output_dirs['TAD_output_directory'], sample.name) for res in res_chr: for chr in chrs: input_matrix = os.path.join( self.output_dirs['matrices_output_directory'], sample.name, "chromosomeMatrices", "_".join( ("HTD", sample.name, self.enzyme, chr, res, "rawRN.txt"))) tmp_matrix = input_matrix + ".MatA" output_matrix = os.path.join( sample_output_dir, "_".join( ("HTD", sample.name, self.enzyme, chr, res, "rawRN.MatA.TopDom"))) ## make TopDom R script: FileContent = """source(\\\"{script}\\\"); TopDom(matrix.file=\'{tmp_matrix}\', window.size={n}, outFile=\'{output_matrix}\')""".format( script=os.path.expandvars( "${R_TOOLS}/TopDom_v0.0.2.R"), tmp_matrix=tmp_matrix, n=config.param('identify_TADs', 'TopDom_n'), output_matrix=output_matrix) fileName = "identify_TADs_TopDom." + sample.name + "_" + chr + "_res" + res + ".R" command_RFile = """echo \"{FileContent}\" > {fileName}""".format( FileContent=FileContent, fileName=fileName) command_TopDom = """mkdir -p {sample_output_dir} && {script} {input} {res}""".format( sample_output_dir=sample_output_dir, script="CreateTopDomMat.sh", input=input_matrix, res=res) job_inputFile = Job( input_files=[input_matrix], output_files=[tmp_matrix], module_entries=[["identify_TADs", "module_R"], [ "identify_TADs", "module_mugqic_tools" ]], name="identify_TADs.create_input." + sample.name + "_" + chr + "_res" + res, command=command_RFile + " && " + command_TopDom, removable_files=[tmp_matrix]) job_TADs = Job( input_files=[tmp_matrix], output_files=[ output_matrix + ".bed", output_matrix + ".binSignal", output_matrix + ".domain" ], module_entries=[["identify_TADs", "module_R"], [ "identify_TADs", "module_mugqic_tools" ]], name="identify_TADs.call_TADs." + sample.name + "_" + chr + "_res" + res, command="Rscript {fileName} && rm {fileName}".format( fileName=fileName, tmp_matrix=tmp_matrix), removable_files=[tmp_matrix]) jobs.extend([job_inputFile, job_TADs]) return jobs
def interaction_matrices_Chr(self): """ IntraChromosomal interaction matrices are produced by Homer at resolutions defined in the ini config file and plotted by HiCPlotter. For more detailed information about the HOMER matrices visit: [HOMER matrices] (http://homer.ucsd.edu/homer/interactions/HiCmatrices.html) For more detailed information about HiCPlotter visit: [HiCPlotter] (https://github.com/kcakdemir/HiCPlotter) """ jobs = [] chrs = config.param('interaction_matrices_Chr', 'chromosomes') res_chr = config.param('interaction_matrices_Chr', 'resolution_chr').split(",") if chrs == "All": genome_dict = os.path.expandvars( config.param('DEFAULT', 'genome_dictionary', type='filepath')) chrs = genome.chr_names_conv(genome_dict) else: chrs = chrs.split(",") for sample in self.samples: tagDirName = "_".join(("HTD", sample.name, self.enzyme)) homer_output_dir = os.path.join( self.output_dirs['homer_output_directory'], tagDirName) sample_output_dir_chr = os.path.join( self.output_dirs['matrices_output_directory'], sample.name, "chromosomeMatrices") # loop over chrs and res: for res in res_chr: for chr in chrs: fileName = os.path.join( sample_output_dir_chr, "_".join( (tagDirName, chr, res, "raw.txt"))) fileNameRN = os.path.join( sample_output_dir_chr, "_".join( (tagDirName, chr, res, "rawRN.txt"))) jobMatrix = homer.hic_interactionMatrix_chr( sample.name, sample_output_dir_chr, homer_output_dir, res, chr, fileName, fileNameRN) fileNamePlot = os.path.join( sample_output_dir_chr, "".join((tagDirName, "_", chr, "_", res, "_raw-", chr, "\'.ofBins(0-\'*\')\'.", str(int(res) / 1000), "K.jpeg"))) newFileNamePlot = os.path.join( sample_output_dir_chr, "".join( (tagDirName, "_", chr, "_", res, "_raw-", chr, ".all.", str(int(res) / 1000), "K.jpeg"))) commandChrPlot = "HiCPlotter.py -f {fileNameRN} -n {name} -chr {chr} -r {res} -fh 0 -o {sample_output_dir_chr} -ptr 0 -hmc {hmc} && mv {fileNamePlot} {newFileNamePlot}".format( res=res, chr=chr, fileNameRN=fileNameRN, name=sample.name, sample_output_dir_chr=os.path.join( sample_output_dir_chr, "_".join( (tagDirName, chr, res, "raw"))), hmc=config.param('interaction_matrices_Chr', 'hmc'), fileNamePlot=fileNamePlot, newFileNamePlot=newFileNamePlot) jobPlot = Job(input_files=[fileNameRN], output_files=[newFileNamePlot], module_entries=[[ "interaction_matrices_Chr", "module_HiCPlotter" ]], name="interaction_matrices_Chr.plotting." + sample.name + "_" + chr + "_res" + res, command=commandChrPlot) jobs.extend([jobMatrix, jobPlot]) return jobs