def bait_intersect(self): """ provided with a bed file, for example a bed of GWAS snps or features of interest, this method returns the lines in the bed file that intersect with the baits that have significant interactions. Input bed must have 4 columns (<chr> <start> <end> <annotation>) and must be tab separated. """ jobs = [] chicago_output_dir = self.output_dirs['chicago_output_directory'] intersect_output_dir = self.output_dirs['intersect_ouput_directory'] other_options = config.param('bait_intersect', 'other_options', required = False) features_file = config.param('bait_intersect', 'features_file', required = True) sorted_features_file = os.path.splitext(features_file)[0] + ".sorted.bed" output_dir = os.path.join(chicago_output_dir, intersect_output_dir) if features_file != "None": job_create_dir = Job(command = "mkdir -p {output_dir}".format(output_dir = output_dir)) job_sort_features_file = bedops.sort_bed(features_file, sorted_features_file) job = concat_jobs([ job_create_dir, job_sort_features_file ]) job.name = "bait_intersect.sort_feature." + os.path.basename(sorted_features_file) job.removable_files = [sorted_features_file] job.samples = self.samples jobs.append(job) for sample in self.samples: sample_output_dir = os.path.join(chicago_output_dir, sample.name, "data") ibed_file = os.path.join(sample_output_dir, sample.name + ".ibed") sorted_ibed_file = re.sub("\.ibed$", ".bait.sorted.bed", ibed_file) output_file_prefix = os.path.join(output_dir, os.path.splitext(os.path.basename(features_file))[0] + "_" + os.path.splitext(os.path.basename(ibed_file))[0]) job_extract_bait_bed = tools.sh_extract_bait_bed(ibed_file, sample.name) job_sort_ibed = bedops.sort_bed(ibed_file + ".bait", sorted_ibed_file) job_intersect = bedtools.intersect_beds(sorted_features_file, sorted_ibed_file, output_file_prefix + ".tmp", "-wa -u") job_bedopsMap = bedops.bedmap_echoMapId(output_file_prefix + ".tmp", sorted_ibed_file, output_file_prefix + ".bait_intersect.bed") job = concat_jobs([ job_extract_bait_bed, job_sort_ibed, job_intersect, job_bedopsMap ]) job.name = "bait_intersect." + sample.name job.removable_files = [sorted_ibed_file, sorted_features_file, output_file_prefix + ".tmp", ibed_file + ".bait"] job.samples = [sample] jobs.append(job) return jobs
def create_rmap_file(self): """ rmap file for Chicago capture analysis is created using the hicup digestion file. """ ## return 1 rmap per enzyme output = os.path.join(self.output_dirs['chicago_input_files'], self.enzyme + ".Initialrmap") sorted_output = re.sub("\.Initialrmap", ".sorted.rmap", output) input_file = self.genome_digest command = """mkdir -p {dir} && \\ cut -f 1-3 {input_file} > {output}.tmp && \\ awk 'BEGIN {{FS=\"\\t\"; OFS=\"\\t\"}} NR>2 {{if ($2 != $3) print $0, NR}}' {output}.tmp > {output} && \\ rm {output}.tmp""".format(dir=self.output_dirs['chicago_input_files'], input_file=input_file, output=output) job_rmap = Job(input_files=[input_file], output_files=[output, sorted_output], command=command, name="create_rmap_file." + self.enzyme) job_sort = bedops.sort_bed(output, sorted_output) job = concat_jobs([job_rmap, job_sort]) job.name = "create_rmap_file." + os.path.basename(input_file) return [job]
def create_baitmap_file(self): """ baitmap file for Chicago capture analysis is created using the created rmap file and the probe capture bed file. """ ## return 1 baitmap per enzyme/capture array combination input_rmap = os.path.join(self.output_dirs['chicago_input_files'], self.enzyme + ".sorted.rmap") input_bait = config.param('create_baitmap_file', "baitBed") sorted_input_bait = re.sub("\.bed", ".sorted.bed", input_bait) output_file_name = re.sub( "\.bed", "", os.path.basename(input_bait)) + "_" + self.enzyme + ".baitmap" output_file = os.path.join(self.output_dirs['chicago_input_files'], output_file_name) annotation = config.param('create_baitmap_file', "annotation") job_sort = bedops.sort_bed(input_bait, sorted_input_bait) job_intersectBeds = bedtools.intersect_beds(input_rmap, sorted_input_bait, output_file + ".tmp", "-wa -u") ## annotate file with annotation in baitBed otherwise annotate with random ids job_anno = Job( input_files=[output_file + ".tmp"], output_files=[output_file], command="""column_num=$(awk 'NR <2 {{print NF}}' {input_bait}) if [[ \"$column_num\" -eq 4 ]]; then bedmap --echo --echo-map-id {outputTmp} {sorted_input_bait} > {outputTmp2} tr '|' '\\t' < {outputTmp2} > {output_file}; rm {outputTmp2}; else awk 'BEGIN {{FS=\"\\t\"; OFS=\"\\t\"}}{{print $0, \"{annotation}\"NR}}' {outputTmp} > {output_file} fi""".format(input_bait=input_bait, outputTmp=output_file + ".tmp", outputTmp2=output_file + ".tmp2", sorted_input_bait=sorted_input_bait, annotation=annotation, output_file=output_file), removable_files=[output_file + ".tmp"], name="create_baitmap_file.addAnno." + output_file_name) job = concat_jobs([job_sort, job_intersectBeds, job_anno]) job.name = "create_baitmap_file." + output_file_name return [job]
def create_rmap_file(self): """ rmap file for Chicago capture analysis is created using the hicup digestion file. """ ## return 1 rmap per enzyme output = os.path.join(self.output_dirs['chicago_input_files'], self.enzyme + ".Initialrmap") sorted_output = re.sub("\.Initialrmap", ".sorted.rmap", output) input_file = self.genome_digest job = concat_jobs([ Job(command="mkdir -p " + self.output_dirs['chicago_input_files']), tools.sh_create_rmap(input_file, output, "create_rmap_file." + self.enzyme), bedops.sort_bed(output, sorted_output) ]) job.name = "create_rmap_file." + os.path.basename(input_file) job.samples = self.samples return [job]
def create_baitmap_file(self): """ baitmap file for Chicago capture analysis is created using the created rmap file and the probe capture bed file. """ ## return 1 baitmap per enzyme/capture array combination input_rmap = os.path.join(self.output_dirs['chicago_input_files'], self.enzyme + ".sorted.rmap") input_bait = config.param('create_baitmap_file', "baitBed") sorted_input_bait = re.sub("\.bed", ".sorted.bed", input_bait) output_file_name = re.sub("\.bed", "", os.path.basename(input_bait)) + "_" + self.enzyme + ".baitmap" output_file = os.path.join(self.output_dirs['chicago_input_files'], output_file_name) annotation = config.param('create_baitmap_file', "annotation") job = concat_jobs([ bedops.sort_bed(input_bait, sorted_input_bait), bedtools.intersect_beds(input_rmap, sorted_input_bait, output_file + ".tmp", "-wa -u"), tools.sh_create_baitmap(input_bait, sorted_input_bait, annotation, output_file) ]) job.name = "create_baitmap_file." + output_file_name job.samples = self.samples return [job]
def capture_intersect(self): """ provided with a bed file, for example a bed of GWAS snps or features of interest, this method returns the lines in the bed file that intersect with the captured ends ("Other Ends") that have significant interactions. Input bed must have 4 columns (<chr> <start> <end> <annotation>) and must be tab separated. """ jobs = [] chicago_output_dir = self.output_dirs['chicago_output_directory'] intersect_output_dir = self.output_dirs['intersect_ouput_directory'] other_options = config.param('capture_intersect', 'other_options', required=False) features_file = config.param('capture_intersect', 'features_file', required=True) sorted_features_file = os.path.splitext( features_file)[0] + ".sorted.bed" output_dir = os.path.join(chicago_output_dir, intersect_output_dir) if features_file != "None": job_create_dir = Job(command="mkdir -p {output_dir}".format( output_dir=output_dir)) job_sort_features_file = bedops.sort_bed(features_file, sorted_features_file) job = concat_jobs([job_create_dir, job_sort_features_file]) job.name = "capture_intersect.sort_feature." + os.path.basename( sorted_features_file) job.removable_files = [sorted_features_file] jobs.append(job) for sample in self.samples: sample_output_dir = os.path.join(chicago_output_dir, sample.name, "data") ibed_file = os.path.join(sample_output_dir, sample.name + ".ibed") sorted_ibed_file = re.sub("\.ibed$", ".capture.sorted.bed", ibed_file) output_file_prefix = os.path.join( output_dir, os.path.splitext(os.path.basename(features_file))[0] + "_" + os.path.splitext(os.path.basename(ibed_file))[0]) job_extract_capture_bed = Job( input_files=[ibed_file], output_files=[ibed_file + ".capture"], name="extract_capture_bed." + sample.name, command= """awk 'BEGIN {{FS=\"\\t\"; OFS=\"\\t\"}} NR>1 {{if ($8 == ".") {{id = $5":"$6"-"$7}} else {{id = $8}} print $5,$6,$7,id}}' {input} > {outputTmp} && \\ awk '!a[$0]++' {outputTmp} > {output} && \\ rm {outputTmp}""".format( input=ibed_file, outputTmp=ibed_file + ".capture.tmp", output=ibed_file + ".capture"), removable_files=[ibed_file + ".capture"]) job_sort_ibed = bedops.sort_bed(ibed_file + ".capture", sorted_ibed_file) job_intersect = bedtools.intersect_beds( sorted_features_file, sorted_ibed_file, output_file_prefix + ".tmp", "-wa -u") job_bedopsMap = bedops.bedmap_echoMapId( output_file_prefix + ".tmp", sorted_ibed_file, output_file_prefix + ".capture_intersect.bed") job = concat_jobs([ job_extract_capture_bed, job_sort_ibed, job_intersect, job_bedopsMap ]) job.name = "capture_intersect." + sample.name job.removable_files = [ sorted_ibed_file, output_file_prefix + ".tmp", ibed_file + ".capture" ] jobs.append(job) return jobs