def compare_table(srna, tables, type_, wigs_f, wigs_r, texs, out, tsss, args_srna): detect = False tss_pro, end_pro = get_tss_pro(type_, srna) if not detect: if type_ == "inter": if srna.strand == "+": covers = get_coverage(wigs_f, srna) else: covers = get_coverage(wigs_r, srna) cut = get_cutoff(srna, tsss, type_, tables, args_srna) frag_covers, tex_covers = devide_covers(covers) srna_datas_tex = replicate_comparison(args_srna, tex_covers, srna.strand, "normal", None, None, None, cut["notex"], cut["tex"], texs) srna_datas_frag = replicate_comparison(args_srna, frag_covers, srna.strand, "normal", None, None, None, None, cut["frag"], texs) srna_datas = merge_srna_datas(srna_datas_tex, srna_datas_frag) elif type_ == "utr": if srna.strand == "+": covers = get_coverage(wigs_f, srna) else: covers = get_coverage(wigs_r, srna) cut = get_cutoff(srna, tsss, type_, tables, args_srna) srna_datas = replicate_comparison( args_srna, covers, srna.strand, "sRNA_utr_derived", cut[srna.attributes["sRNA_type"]], cut, srna.attributes["sRNA_type"], None, cut, texs) if len(srna_datas["conds"]) != 0: out.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}" "\t{8}\t{9}\t{10}\t{11}\t".format( srna.seq_id, srna.attributes["Name"], srna.start, srna.end, srna.strand, ";".join(srna_datas["conds"].keys()), ";".join(srna_datas["conds"].values()), tss_pro, end_pro, srna_datas["best"], srna_datas["high"], srna_datas["low"])) if not args_srna.table_best: first = True for data in srna_datas["detail"]: if first: out.write("{0}(avg={1};high={2};low={3})".format( data["track"], data["avg"], data["high"], data["low"])) first = False else: out.write(";{0}(avg={1};high={2};low={3})".format( data["track"], data["avg"], data["high"], data["low"])) else: out.write("{0}(avg={1};high={2};low={3})".format( srna_datas["track"], srna_datas["best"], srna_datas["high"], srna_datas["low"])) out.write("\t{0}\t{1}\n".format( srna.attributes["overlap_cds"].replace(",", ";"), srna.attributes["overlap_percent"].replace(",", ";")))
def compare_table(srna, tables, type_, wigs_f, wigs_r, texs, out, tsss, args_srna): detect = False tss_pro, end_pro = get_tss_pro(type_, srna) if not detect: if type_ == "inter": if srna.strand == "+": covers = get_coverage(wigs_f, srna) else: covers = get_coverage(wigs_r, srna) cut = get_cutoff(srna, tsss, type_, tables, args_srna) frag_covers, tex_covers = devide_covers(covers) srna_datas_tex = replicate_comparison( args_srna, tex_covers, srna.strand, "normal", None, None, None, cut["notex"], cut["tex"], texs) srna_datas_frag = replicate_comparison( args_srna, frag_covers, srna.strand, "normal", None, None, None, None, cut["frag"], texs) srna_datas = merge_srna_datas(srna_datas_tex, srna_datas_frag) elif type_ == "utr": if srna.strand == "+": covers = get_coverage(wigs_f, srna) else: covers = get_coverage(wigs_r, srna) cut = get_cutoff(srna, tsss, type_, tables, args_srna) srna_datas = replicate_comparison( args_srna, covers, srna.strand, "sRNA_utr_derived", cut[srna.attributes["sRNA_type"]], cut, srna.attributes["sRNA_type"], None, cut, texs) if len(srna_datas["conds"]) != 0: out.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}" "\t{8}\t{9}\t{10}\t{11}\t".format( srna.seq_id, srna.attributes["Name"], srna.start, srna.end, srna.strand, ";".join(srna_datas["conds"].keys()), ";".join(srna_datas["conds"].values()), tss_pro, end_pro, srna_datas["best"], srna_datas["high"], srna_datas["low"])) if not args_srna.table_best: first = True for data in srna_datas["detail"]: if first: out.write("{0}(avg={1};high={2};low={3})".format( data["track"], data["avg"], data["high"], data["low"])) first = False else: out.write(";{0}(avg={1};high={2};low={3})".format( data["track"], data["avg"], data["high"], data["low"])) else: out.write("{0}(avg={1};high={2};low={3})".format( srna_datas["track"], srna_datas["best"], srna_datas["high"], srna_datas["low"])) out.write("\t{0}\t{1}\n".format( srna.attributes["overlap_cds"].replace("&", ";"), srna.attributes["overlap_percent"].replace("&", ";")))
def check_pro(ta, start, end, srna_datas, type_, cutoff, wigs, notex, args_srna): pro_pos = -1 detect_pro = "NA" for pro in args_srna.pros: if (pro.seq_id == ta.seq_id) and (pro.strand == ta.strand): if ta.strand == "+": if (pro.start >= ta.start) and (pro.start <= ta.end) and ( (pro.start - start) >= args_srna.min_len) and ( (pro.start - start) <= args_srna.max_len): pro_pos = pro.start detect_pro = "".join( ["Cleavage:", str(pro.start), "_", pro.strand]) if pro.start > ta.end: break if ta.strand == "-": if (pro.start >= ta.start) and (pro.start <= ta.end) and ( (end - pro.start) >= args_srna.min_len) and ( (end - pro.start) <= args_srna.max_len): pro_pos = pro.start detect_pro = "".join( ["Cleavage:", str(pro.start), "_", pro.strand]) break if pro.start > ta.end: break new_srna_datas = None if ta.strand == "+": if ((type_ == "within") and (srna_datas["pos"] < pro_pos)) or ((type_ == "longer") and (pro_pos != -1)): srna_covers = get_best(wigs, ta.seq_id, ta.strand, start, pro_pos, "total", args_srna, cutoff) new_srna_datas = replicate_comparison(args_srna, srna_covers, ta.strand, "normal", None, None, None, notex, cutoff, args_srna.texs) if new_srna_datas["best"] <= cutoff: new_srna_datas = None else: if ((type_ == "within") and (srna_datas["pos"] > pro_pos)) or ((type_ == "longer") and (pro_pos != -1)): srna_covers = get_best(wigs, ta.seq_id, ta.strand, pro_pos, end, "total", args_srna, cutoff) new_srna_datas = replicate_comparison(args_srna, srna_covers, ta.strand, "normal", None, None, None, notex, cutoff, args_srna.texs) if new_srna_datas["best"] <= cutoff: new_srna_datas = None return pro_pos, new_srna_datas, detect_pro
def check_pro(ta, start, end, srna_datas, type_, cutoff, wigs, notex, args_srna): '''check the processing site for long non-coding RNA''' pro_pos = -1 detect_pro = "NA" for pro in args_srna.pros: if (pro.seq_id == ta.seq_id) and ( pro.strand == ta.strand): if ta.strand == "+": if (pro.start >= ta.start) and (pro.start <= ta.end) and ( (pro.start - start) >= args_srna.min_len) and ( (pro.start - start) <= args_srna.max_len): pro_pos = pro.start detect_pro = "".join(["Cleavage:", str(pro.start), "_", pro.strand]) if pro.start > ta.end: break if ta.strand == "-": if (pro.start >= ta.start) and (pro.start <= ta.end) and ( (end - pro.start) >= args_srna.min_len) and ( (end - pro.start) <= args_srna.max_len): pro_pos = pro.start detect_pro = "".join(["Cleavage:", str(pro.start), "_", pro.strand]) break if pro.start > ta.end: break new_srna_datas = None if ta.strand == "+": if ((type_ == "within") and (srna_datas["pos"] < pro_pos)) or ( (type_ == "longer") and (pro_pos != -1)): srna_covers = get_best(wigs, ta.seq_id, ta.strand, start, pro_pos, "total", args_srna, cutoff) new_srna_datas = replicate_comparison( args_srna, srna_covers, ta.strand, "normal", None, None, None, notex, cutoff, args_srna.texs) if new_srna_datas["best"] <= cutoff: new_srna_datas = None else: if ((type_ == "within") and (srna_datas["pos"] > pro_pos) and ( pro_pos != -1)) or ( (type_ == "longer") and (pro_pos != -1)): srna_covers = get_best(wigs, ta.seq_id, ta.strand, pro_pos, end, "total", args_srna, cutoff) new_srna_datas = replicate_comparison( args_srna, srna_covers, ta.strand, "normal", None, None, None, notex, cutoff, args_srna.texs) if new_srna_datas["best"] <= cutoff: new_srna_datas = None return pro_pos, new_srna_datas, detect_pro
def detect_wig_pos(wigs, ta, start, end, tss, cutoff, notex, args_srna): srna_covers = get_best(wigs, ta.seq_id, ta.strand, start, end, "differential", args_srna, cutoff) srna_datas = replicate_comparison(args_srna, srna_covers, ta.strand, "normal", None, None, None, notex, cutoff, args_srna.texs) detect, srna_datas, pro = exchange_to_pro(args_srna, srna_datas, ta, start, end, cutoff, wigs, notex) if ta.strand == "+": if detect: string = ("\t".join([ str(field) for field in [ ta.seq_id, "ANNOgesic", "sRNA", str(start), str(srna_datas["pos"]), ".", ta.strand, "." ] ])) if pro != "NA": tss = ";".join([tss, pro]) print_file(string, tss, srna_datas, ta.attributes["sRNA_type"], args_srna) else: if detect: string = ("\t".join([ str(field) for field in [ ta.seq_id, "ANNOgesic", "sRNA", str(srna_datas["pos"]), str(end), ".", ta.strand, "." ] ])) if pro != "NA": tss = ";".join([tss, pro]) print_file(string, tss, srna_datas, ta.attributes["sRNA_type"], args_srna)
def detect_wig_pos(wigs, ta, start, end, tss, cutoff, notex, args_srna): '''searching the coverage decrease''' srna_covers = get_best(wigs, ta.seq_id, ta.strand, start, end, "differential", args_srna, cutoff) srna_datas = replicate_comparison( args_srna, srna_covers, ta.strand, "normal", None, None, None, notex, cutoff, args_srna.texs) detect, srna_datas, pro = exchange_to_pro(args_srna, srna_datas, ta, start, end, cutoff, wigs, notex) if ta.strand == "+": if (detect) and ( (srna_datas["pos"] - start) >= args_srna.min_len) and ( (srna_datas["pos"] - start) <= args_srna.max_len): string = ("\t".join([str(field) for field in [ ta.seq_id, "ANNOgesic", "ncRNA", str(start), str(srna_datas["pos"]), ".", ta.strand, "."]])) if pro != "NA": tss = ";".join([tss, pro]) print_file(string, tss, srna_datas, ta.attributes["sRNA_type"], args_srna, ta.seq_id) else: if (detect) and ( (end - srna_datas["pos"]) >= args_srna.min_len) and ( (end - srna_datas["pos"]) <= args_srna.max_len): string = ("\t".join([str(field) for field in [ ta.seq_id, "ANNOgesic", "ncRNA", str(srna_datas["pos"]), str(end), ".", ta.strand, "."]])) if pro != "NA": tss = ";".join([tss, pro]) print_file(string, tss, srna_datas, ta.attributes["sRNA_type"], args_srna, ta.seq_id)
def detect_srna(median, args_srna): '''check the sRNA candidates and print it out''' num = 0 if len(args_srna.srnas) != 0: for srna in args_srna.srnas: if srna["strain"] in median.keys(): srna_datas = replicate_comparison( args_srna, srna["datas"], srna["strand"], "sRNA_utr_derived", median[srna["strain"]][srna["utr"]], args_srna.coverages, srna["utr"], None, None, args_srna.texs) if srna_datas["best"] != 0: if (srna["utr"] == "5utr") or ( srna["utr"] == "interCDS"): start = srna_datas["start"] end = srna_datas["end"] elif srna["utr"] == "3utr": start = srna["start"] end = srna["end"] if (math.fabs(start - end) >= args_srna.min_len) and ( math.fabs(start - end) <= args_srna.max_len): print_file(num, srna, start, end, srna_datas, args_srna) num += 1
def get_coverage(start, end, strain, wigs, strand, ta, tss, cutoff_coverage, notex, args_srna): srna_covers = get_best(wigs, strain, strand, start, end, "total", args_srna, cutoff_coverage) srna_datas = replicate_comparison(args_srna, srna_covers, strand, "normal", None, None, None, notex, cutoff_coverage, args_srna.texs) string = ("\t".join([str(field) for field in [ ta.seq_id, "ANNOgesic", "ncRNA", str(start), str(end), ".", ta.strand, "."]])) if srna_datas["best"] != 0: print_file(string, tss, srna_datas, ta.attributes["sRNA_type"], args_srna, strain)
def get_coverage(start, end, strain, wigs, strand, ta, tss, cutoff_coverage, notex, args_srna): srna_covers = get_best(wigs, strain, strand, start, end, "total", args_srna, cutoff_coverage) srna_datas = replicate_comparison(args_srna, srna_covers, strand, "normal", None, None, None, notex, cutoff_coverage, args_srna.texs) string = ("\t".join([str(field) for field in [ ta.seq_id, "ANNOgesic", "ncRNA", str(start), str(end), ".", ta.strand, "."]])) if srna_datas["best"] != 0: print_file(string, tss, srna_datas, ta.attributes["sRNA_type"], args_srna)
def coverage_and_output(sorfs, mediandict, wigs, out_g, out_t, file_type, fasta, coverages, args_sorf, texs, run): '''get the coverage of sORF and print it out''' if run == "final": out_g.write("##gff-version 3\n") if args_sorf.print_all: out_t.write("\t".join([ "Genome", "Name", "Start", "End", "Strand", "Type", "TSS", "Ribosome_binding_site", "All_start_points", "All_stop_points", "sRNA_conflict", "Frame_shift", "Lib_type", "Best_avg_coverage", "Best_highest_coverage", "Best_lowest_coverage", "Track_detail", "Seq", "Combinations" ]) + "\n") else: out_t.write("\t".join([ "Genome", "Name", "Start", "End", "Strand", "Type", "TSS", "Ribosome_binding_site", "All_start_points", "All_stop_points", "sRNA_conflict", "Frame_shift", "Lib_type", "Best_avg_coverage", "Best_highest_coverage", "Best_lowest_coverage", "Track_detail", "Seq" ]) + "\n") num = 0 final_sorfs = [] for sorf in sorfs: if ((compare_rbs_start(sorf, args_sorf.min_rbs, args_sorf.max_rbs)) and (file_type == "best")) or (file_type == "all"): if file_type == "best": check_start_end(sorf, args_sorf, fasta, run) detect_frame_shift(sorf) cutoffs = {} if sorf["strand"] == "+": sorf_covers = get_coverage(sorf, wigs["forward"], "+", coverages, mediandict, cutoffs) else: sorf_covers = get_coverage(sorf, wigs["reverse"], "-", coverages, mediandict, cutoffs) if len(sorf_covers) != 0: sorf_info = replicate_comparison(args_sorf, sorf_covers, sorf["strand"], "sORF", None, cutoffs, None, cutoffs, None, texs) if len(sorf_info["conds"].keys()) != 0: if run != "final": final_sorfs.append(sorf) else: print_file(sorf, sorf_info, num, out_g, out_t, file_type, args_sorf) num += 1 if run != "final": return final_sorfs
def coverage_and_output(sorfs, mediandict, wigs, out_g, out_t, file_type, fasta, coverages, args_sorf, texs, run): '''get the coverage of sORF and print it out''' if run == "final": out_g.write("##gff-version 3\n") if args_sorf.print_all: out_t.write("\t".join([ "Genome", "Name", "Start", "End", "Strand", "Type", "TSS", "Ribosome_binding_site", "All_start_points", "All_stop_points", "Conflict_sRNA", "Frame_shift", "Lib_type", "Best_avg_coverage", "Track_detail", "Seq", "Combinations"]) + "\n") else: out_t.write("\t".join([ "Genome", "Name", "Start", "End", "Strand", "Type", "TSS", "Ribosome_binding_site", "All_start_points", "All_stop_points", "Conflict_sRNA", "Frame_shift", "Lib_type", "Best_avg_coverage", "Track_detail", "Seq"]) + "\n") num = 0 final_sorfs = [] for sorf in sorfs: if ((compare_rbs_start(sorf, args_sorf.min_rbs, args_sorf.max_rbs)) and ( file_type == "best")) or ( file_type == "all"): if file_type == "best": check_start_end(sorf, args_sorf, fasta, run) detect_frame_shift(sorf) cutoffs = {} if sorf["strand"] == "+": sorf_covers = get_coverage(sorf, wigs["forward"], "+", coverages, mediandict, cutoffs, args_sorf.background) else: sorf_covers = get_coverage(sorf, wigs["reverse"], "-", coverages, mediandict, cutoffs, args_sorf.background) if len(sorf_covers) != 0: sorf_info = replicate_comparison( args_sorf, sorf_covers, sorf["strand"], "sORF", None, cutoffs, None, cutoffs, None, texs) if len(sorf_info["conds"].keys()) != 0: if run != "final": final_sorfs.append(sorf) else: print_file(sorf, sorf_info, num, out_g, out_t, file_type, args_sorf) num += 1 if run != "final": return final_sorfs
def test_replicate_comparison(self): cover_detect.check_tex = Mock_func().mock_check_tex template_texs = self.example.texs srna_covers = {"texnotex": self.example.cover_datas} coverages = {"3utr": 100, "5utr": 600} median = { "track1_tex": { "median": 100, "mean": 200 }, "track1_notex": { "median": 30, "mean": 80 }, "track2_tex": { "median": 150, "mean": 200 }, "track2_notex": { "median": 10, "mean": 20 }, "frag": { "median": 80, "mean": 100 } } texs = { "track1_tex@AND@track1_notex": 0, "track2_tex@AND@track2_notex": 0 } args = self.mock_args.mock() args.replicates = {"tex": ["all_2"], "frag": ["all_1"]} args.tex_notex = 2 srna_datas = cover_detect.replicate_comparison(args, srna_covers, "+", "sRNA_utr_derived", median, coverages, "3utr", 100, 200, template_texs) self.assertEqual(srna_datas["best"], 500) self.assertEqual(srna_datas["track"], "frag") self.assertEqual(srna_datas["high"], 700) self.assertEqual(srna_datas["low"], 400) self.assertEqual(srna_datas["start"], 100) self.assertEqual(srna_datas["end"], 202)
def coverage_and_output(sorfs, mediandict, wigs, out_g, out_t, file_type, fasta, coverages, args_sorf, texs): '''get the coverage of sORF and print it out''' out_g.write("##gff-version 3\n") if args_sorf.print_all: out_t.write("\t".join([ "strain", "Name", "start", "end", "strand", "type", "TSS", "RBS", "all_start_points", "all_stop_pointss", "sRNA_conflict", "frame_shift", "lib_type", "best_avg_coverage", "best_highest_coverage", "best_lowest_coverage", "track_detail", "seq", "combinations"]) + "\n") else: out_t.write("\t".join([ "strain", "Name", "start", "end", "strand", "type", "TSS", "RBS", "all_start_points", "all_stop_points", "sRNA_conflict", "frame_shift", "lib_type", "best_avg_coverage", "best_highest_coverage", "best_lowest_coverage", "track_detail", "seq"]) + "\n") num = 0 for sorf in sorfs: if ((compare_rbs_start(sorf, args_sorf.min_rbs, args_sorf.max_rbs)) and ( file_type == "best")) or ( file_type == "all"): if file_type == "best": check_start_end(sorf, args_sorf, fasta) detect_frame_shift(sorf) cutoffs = {} if sorf["strand"] == "+": sorf_covers = get_coverage(sorf, wigs["forward"], "+", coverages, mediandict, cutoffs) else: sorf_covers = get_coverage(sorf, wigs["reverse"], "-", coverages, mediandict, cutoffs) if len(sorf_covers) != 0: sorf_info = replicate_comparison( args_sorf, sorf_covers, sorf["strand"], "sORF", None, cutoffs, None, cutoffs, None, texs) if len(sorf_info["conds"].keys()) != 0: print_file(sorf, sorf_info, num, out_g, out_t, file_type, args_sorf) num += 1
def coverage_and_output(sorfs, mediandict, wigs, out_g, out_t, file_type, fasta, coverages, args_sorf, texs): out_g.write("##gff-version 3\n") if args_sorf.print_all: out_t.write("\t".join([ "strain", "Name", "start", "end", "strand", "type", "TSS", "RBS", "all_start_points", "all_stop_pointss", "sRNA_confliction", "frame_shift", "lib_type", "best_avg_coverage", "best_highest_coverage", "best_lowest_coverage", "track_detail", "seq", "combinations"]) + "\n") else: out_t.write("\t".join([ "strain", "Name", "start", "end", "strand", "type", "TSS", "RBS", "all_start_points", "all_stop_points", "sRNA_confliction", "frame_shift", "lib_type", "best_avg_coverage", "best_highest_coverage", "best_lowest_coverage", "track_detail", "seq"]) + "\n") num = 0 for sorf in sorfs: if ((compare_rbs_start(sorf, args_sorf.min_rbs, args_sorf.max_rbs)) and ( file_type == "best")) or ( file_type == "all"): if file_type == "best": check_start_end(sorf, args_sorf, fasta) detect_frame_shift(sorf) cutoffs = {} if sorf["strand"] == "+": sorf_covers = get_coverage(sorf, wigs["forward"], "+", coverages, mediandict, cutoffs) else: sorf_covers = get_coverage(sorf, wigs["reverse"], "-", coverages, mediandict, cutoffs) if len(sorf_covers) != 0: sorf_info = replicate_comparison( args_sorf, sorf_covers, sorf["strand"], "sORF", None, cutoffs, None, cutoffs, None, texs) if len(sorf_info["conds"].keys()) != 0: print_file(sorf, sorf_info, num, out_g, out_t, file_type, args_sorf) num += 1
def test_replicate_comparison(self): cover_detect.check_tex = Mock_func().mock_check_tex template_texs = self.example.texs srna_covers = {"texnotex": self.example.cover_datas} coverages = {"3utr": 100, "5utr": 600} median = {"track1_tex": {"median": 100, "mean": 200}, "track1_notex": {"median": 30, "mean": 80}, "track2_tex": {"median": 150, "mean": 200}, "track2_notex": {"median": 10, "mean": 20}, "frag": {"median": 80, "mean": 100}} texs = {"track1_tex@AND@track1_notex": 0, "track2_tex@AND@track2_notex": 0} args = self.mock_args.mock() args.replicates = {"tex": 2, "frag": 1} args.tex_notex = 2 srna_datas = cover_detect.replicate_comparison(args, srna_covers, "+", "sRNA_utr_derived", median, coverages, "3utr", 100, 200, template_texs) self.assertEqual(srna_datas["best"], 500) self.assertEqual(srna_datas["track"], "frag") self.assertEqual(srna_datas["high"], 700) self.assertEqual(srna_datas["low"], 400) self.assertEqual(srna_datas["start"], 100) self.assertEqual(srna_datas["end"], 202)
def detect_srna(median, args_srna): num = 0 if len(args_srna.srnas) != 0: for srna in args_srna.srnas: if srna["strain"] in median.keys(): srna_datas = replicate_comparison( args_srna, srna["datas"], srna["strand"], "sRNA_utr_derived", median[srna["strain"]][srna["utr"]], args_srna.coverages, srna["utr"], None, None, args_srna.texs) if srna_datas["best"] != 0: if (srna["utr"] == "5utr") or (srna["utr"] == "interCDS"): start = srna_datas["start"] end = srna_datas["end"] elif srna["utr"] == "3utr": start = srna["start"] end = srna["end"] if (math.fabs(start - end) >= args_srna.min_len) and ( math.fabs(start - end) <= args_srna.max_len): print_file(num, srna, start, end, srna_datas, args_srna) num += 1