def __init__(self, args): """Create an instance.""" self._args = args if (len(args.__dict__) > 3): if not os.path.exists(args.project_path): print("Error: --project_path does not exists!") sys.exit() self._paths = Paths(args.project_path) self.args_container = ArgsContainer() self.helper = Helper()
def utr_derived_srna(args_srna, libs, texs, wig_fs, wig_rs): inters = [] cdss, tas, tsss, pros, seq = read_data(args_srna) out = open(args_srna.output_file, "w") out.write("##gff-version 3\n") out_t = open(args_srna.output_table, "w") get_terminal(cdss, inters, seq, "start") get_inter(cdss, inters) get_terminal(cdss, inters, seq, "end") inters = sorted(inters, key=lambda k: (k["strain"], k["start"], k["end"], k["strand"])) args_srna = ArgsContainer().extend_utr_container(args_srna, cdss, tsss, pros, out, out_t, texs) for inter in inters: for ta in tas: if (inter["strain"] == ta.seq_id) and (inter["strand"] == ta.strand): class_utr(inter, ta, args_srna, wig_fs, wig_rs) covers = get_utr_coverage(args_srna.utrs) mediandict = set_cutoff(covers, args_srna) print_median(args_srna.out_folder, mediandict) detect_srna(mediandict, args_srna) args_srna.out.close() args_srna.out_t.close() paras = [ args_srna.srnas, args_srna.utrs, seq, inters, tas, cdss, tas, tsss, pros, covers ] free_memory(paras)
def intergenic_srna(args_srna, libs, texs, wigs_f, wigs_r, tss_file): '''get intergenic and antisense sRNA''' inter_cutoff_coverage, inter_notex = get_intergenic_antisense_cutoff( args_srna) anti_cutoff_coverage, anti_notex = get_intergenic_antisense_cutoff( args_srna) nums, cdss, tas, pros, genes, ncs = read_data(args_srna) tsss, num_tss = read_tss(tss_file) detects = {"overlap": False, "uni_with_tss": False, "anti": False} output = open(args_srna.output_file, "w") out_table = open(args_srna.output_table, "w") output.write("##gff-version 3\n") for ta in tas: detects["overlap"] = False detects["anti"] = False compare_ta_cds(cdss, ta, detects) if (detects["overlap"]) and (not args_srna.in_cds): continue else: if not detects["anti"]: cutoff_coverage = inter_cutoff_coverage notex = inter_notex else: cutoff_coverage = anti_cutoff_coverage notex = anti_notex args_srna = ArgsContainer().extend_inter_container( args_srna, tsss, pros, nums, output, out_table, texs, detects, cutoff_coverage, notex) check_srna_condition(ta, args_srna, cdss, wigs_f, wigs_r) file_name = args_srna.output_file.split(".") file_name = file_name[0] + ".stat" output.close() out_table.close() paras = [tsss, tas, pros, genes, cdss] free_memory(paras)
def __init__(self, args_srna): self.args_container = ArgsContainer() self.helper = Helper() self.multiparser = Multiparser() self.gff_output = os.path.join(args_srna.out_folder, "gffs") self.table_output = os.path.join(args_srna.out_folder, "tables") self.stat_path = os.path.join(args_srna.out_folder, "statistics") self.tss_path = self._check_folder_exist(args_srna.tss_folder) self.pro_path = self._check_folder_exist(args_srna.pro_folder) self.sorf_path = self._check_folder_exist(args_srna.sorf_file) self.fasta_path = os.path.join(args_srna.fastas, "tmp") self.tran_path = os.path.join(args_srna.trans, "tmp") self.term_path = self._check_folder_exist(args_srna.terms) self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs") self.prefixs = { "merge": os.path.join(args_srna.out_folder, "tmp_merge"), "utr": os.path.join(args_srna.out_folder, "tmp_utrsrna"), "normal": os.path.join(args_srna.out_folder, "tmp_normal"), "in_cds": os.path.join(args_srna.out_folder, "tmp_incds"), "merge_table": os.path.join(args_srna.out_folder, "tmp_merge_table"), "utr_table": os.path.join(args_srna.out_folder, "tmp_utrsrna_table"), "normal_table": os.path.join(args_srna.out_folder, "tmp_normal_table"), "in_cds_table": os.path.join(args_srna.out_folder, "tmp_incds_table"), "basic": os.path.join(args_srna.out_folder, "tmp_basic"), "energy": os.path.join(args_srna.out_folder, "tmp_energy") } self.tmps = { "nr": os.path.join(args_srna.out_folder, "tmp_nr"), "srna": os.path.join(args_srna.out_folder, "tmp_sRNA") } self.best_table = os.path.join(self.table_output, "best") self.table_output = os.path.join(args_srna.out_folder, "tables") self.stat_path = os.path.join(args_srna.out_folder, "statistics") self.all_best = { "all_gff": os.path.join(self.gff_output, "all_candidates"), "best_gff": os.path.join(self.gff_output, "best"), "all_table": os.path.join(self.table_output, "all_candidates"), "best_table": os.path.join(self.table_output, "best") }
def __init__(self, args_srna): self.args_container = ArgsContainer() self.helper = Helper() self.multiparser = Multiparser() self.gff_output = os.path.join(args_srna.out_folder, "gffs") self.table_output = os.path.join(args_srna.out_folder, "tables") self.stat_path = os.path.join(args_srna.out_folder, "statistics") self.tss_path = self._check_folder_exist(args_srna.tss_folder) self.pro_path = self._check_folder_exist(args_srna.pro_folder) self.sorf_path = self._check_folder_exist(args_srna.sorf_file) self.fasta_path = os.path.join(args_srna.fastas, "tmp") self.tran_path = os.path.join(args_srna.trans, "tmp") self.term_path = self._check_folder_exist(args_srna.terms) self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs") self.prefixs = {"merge": os.path.join( args_srna.out_folder, "tmp_merge"), "utr": os.path.join( args_srna.out_folder, "tmp_utrsrna"), "normal": os.path.join( args_srna.out_folder, "tmp_normal"), "in_cds": os.path.join( args_srna.out_folder, "tmp_incds"), "merge_table": os.path.join( args_srna.out_folder, "tmp_merge_table"), "utr_table": os.path.join( args_srna.out_folder, "tmp_utrsrna_table"), "normal_table": os.path.join( args_srna.out_folder, "tmp_normal_table"), "in_cds_table": os.path.join( args_srna.out_folder, "tmp_incds_table"), "basic": os.path.join( args_srna.out_folder, "tmp_basic"), "energy": os.path.join( args_srna.out_folder, "tmp_energy")} self.tmps = {"nr": os.path.join(args_srna.out_folder, "tmp_nr"), "srna": os.path.join(args_srna.out_folder, "tmp_sRNA")} self.best_table = os.path.join(self.table_output, "best") self.table_output = os.path.join(args_srna.out_folder, "tables") self.stat_path = os.path.join(args_srna.out_folder, "statistics") self.all_best = {"all_gff": os.path.join( self.gff_output, "all_candidates"), "best_gff": os.path.join(self.gff_output, "best"), "all_table": os.path.join( self.table_output, "all_candidates"), "best_table": os.path.join(self.table_output, "best")}
class sRNADetection(object): def __init__(self, args_srna): self.args_container = ArgsContainer() self.helper = Helper() self.multiparser = Multiparser() self.gff_output = os.path.join(args_srna.out_folder, "gffs") self.table_output = os.path.join(args_srna.out_folder, "tables") self.stat_path = os.path.join(args_srna.out_folder, "statistics") self.tss_path = self._check_folder_exist(args_srna.tss_folder) self.pro_path = self._check_folder_exist(args_srna.pro_folder) self.sorf_path = self._check_folder_exist(args_srna.sorf_file) self.fasta_path = os.path.join(args_srna.fastas, "tmp") self.tran_path = os.path.join(args_srna.trans, "tmp") self.term_path = self._check_folder_exist(args_srna.terms) self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs") self.prefixs = {"merge": os.path.join( args_srna.out_folder, "tmp_merge"), "utr": os.path.join( args_srna.out_folder, "tmp_utrsrna"), "normal": os.path.join( args_srna.out_folder, "tmp_normal"), "in_cds": os.path.join( args_srna.out_folder, "tmp_incds"), "merge_table": os.path.join( args_srna.out_folder, "tmp_merge_table"), "utr_table": os.path.join( args_srna.out_folder, "tmp_utrsrna_table"), "normal_table": os.path.join( args_srna.out_folder, "tmp_normal_table"), "in_cds_table": os.path.join( args_srna.out_folder, "tmp_incds_table"), "basic": os.path.join( args_srna.out_folder, "tmp_basic"), "energy": os.path.join( args_srna.out_folder, "tmp_energy")} self.tmps = {"nr": os.path.join(args_srna.out_folder, "tmp_nr"), "srna": os.path.join(args_srna.out_folder, "tmp_sRNA")} self.best_table = os.path.join(self.table_output, "best") self.table_output = os.path.join(args_srna.out_folder, "tables") self.stat_path = os.path.join(args_srna.out_folder, "statistics") self.all_best = {"all_gff": os.path.join( self.gff_output, "all_candidates"), "best_gff": os.path.join(self.gff_output, "best"), "all_table": os.path.join( self.table_output, "all_candidates"), "best_table": os.path.join(self.table_output, "best")} def _check_folder_exist(self, folder): if folder is not None: path = os.path.join(folder, "tmp") else: path = None return path def _check_gff(self, gffs): for gff in os.listdir(gffs): if gff.endswith(".gff"): self.helper.check_uni_attributes(os.path.join(gffs, gff)) def _run_format(self, blast_path, database, type_, db_file, err): call([os.path.join(blast_path, "makeblastdb"), "-in", database, "-dbtype", type_, "-out", db_file], stderr=err) def _formatdb(self, database, type_, out_folder, blast_path, database_type): err = open(os.path.join(out_folder, "log.txt"), "w") if (database.endswith(".fa")) or ( database.endswith(".fna")) or ( database.endswith(".fasta")): pass else: folders = database.split("/") filename = folders[-1] folder = "/".join(folders[:-1]) for fasta in os.listdir(folder): if (fasta.endswith(".fa")) or ( fasta.endswith(".fna")) or ( fasta.endswith(".fasta")): if ".".join(fasta.split(".")[:-1]) == filename: database = os.path.join(folder, fasta) if database_type == "sRNA": change_format(database, "tmp_srna_database") os.remove(database) shutil.move("tmp_srna_database", database) db_file = ".".join(database.split(".")[:-1]) self._run_format(blast_path, database, type_, db_file, err) err.close() def _merge_frag_tex_file(self, files, args_srna): if (args_srna.frag_wigs is not None) and ( args_srna.tex_wigs is not None): self.helper.merge_file(files["frag_gff"], files["tex_gff"]) self.helper.merge_file(files["frag_csv"], files["tex_csv"]) shutil.move(files["tex_csv"], files["merge_csv"]) self.helper.sort_gff(files["tex_gff"], files["merge_gff"]) os.remove(files["frag_csv"]) os.remove(files["frag_gff"]) os.remove(files["tex_gff"]) elif (args_srna.frag_wigs is not None): shutil.move(files["frag_csv"], files["merge_csv"]) self.helper.sort_gff(files["frag_gff"], files["merge_gff"]) os.remove(files["frag_gff"]) elif (args_srna.tex_wigs is not None): shutil.move(files["tex_csv"], files["merge_csv"]) self.helper.sort_gff(files["tex_gff"], files["merge_gff"]) def _run_normal(self, prefix, gff, tran, fuzzy_tss, args_srna): if "tmp_cutoff_inter" in os.listdir(args_srna.out_folder): os.remove(os.path.join(args_srna.out_folder, "tmp_cutoff_inter")) files = {"frag_gff": None, "frag_csv": None, "tex_gff": None, "tex_csv": None, "merge_gff": None, "merge_csv": None} if ("tss" in args_srna.import_info): tss = self.helper.get_correct_file(self.tss_path, "_TSS.gff", prefix, None, None) else: tss = None if self.pro_path is not None: pro = self.helper.get_correct_file( self.pro_path, "_processing.gff", prefix, None, None) else: pro = None if args_srna.frag_wigs is not None: files["frag_gff"] = os.path.join( args_srna.out_folder, "_".join(["tmp_frag", prefix])) files["frag_csv"] = os.path.join( args_srna.out_folder, "_".join(["tmp_frag_table", prefix])) args_srna = self.args_container.container_intersrna( "frag", files, args_srna, prefix, os.path.join(args_srna.gffs, gff), tran, tss, pro, fuzzy_tss) intergenic_srna(args_srna) if args_srna.tex_wigs is not None: files["tex_gff"] = os.path.join( args_srna.out_folder, "_".join(["tmp_tex", prefix])) files["tex_csv"] = os.path.join( args_srna.out_folder, "_".join(["tmp_tex_table", prefix])) args_srna = self.args_container.container_intersrna( "tex", files, args_srna, prefix, os.path.join(args_srna.gffs, gff), tran, tss, pro, fuzzy_tss) intergenic_srna(args_srna) files["merge_csv"] = "_".join([self.prefixs["normal_table"], prefix]) files["merge_gff"] = "_".join([self.prefixs["normal"], prefix]) self._merge_frag_tex_file(files, args_srna) if "TSS_class" in os.listdir(args_srna.out_folder): tss = os.path.join(args_srna.out_folder, "TSS_class", prefix + "_TSS.gff") return tss def _run_utrsrna(self, gff, tran, prefix, tss, pro, args_srna): if "tmp_median" in os.listdir(args_srna.out_folder): os.remove(os.path.join(args_srna.out_folder, "tmp_median")) files = {"frag_gff": None, "frag_csv": None, "tex_gff": None, "tex_csv": None, "merge_gff": None, "merge_csv": None} if args_srna.tex_wigs is not None: files["tex_gff"] = os.path.join( args_srna.out_folder, "_".join(["tmp_utr_tex", prefix])) files["tex_csv"] = os.path.join( args_srna.out_folder, "_".join(["tmp_utr_tex_table", prefix])) args_srna = self.args_container.container_utrsrna( os.path.join(args_srna.gffs, gff), tran, tss, files, pro, os.path.join(self.fasta_path, prefix + ".fa"), "tex", prefix, args_srna) utr_derived_srna(args_srna) if args_srna.frag_wigs is not None: files["frag_gff"] = os.path.join( args_srna.out_folder, "_".join(["tmp_utr_frag", prefix])) files["frag_csv"] = os.path.join( args_srna.out_folder, "_".join(["tmp_utr_frag_table", prefix])) args_srna = self.args_container.container_utrsrna( os.path.join(args_srna.gffs, gff), tran, tss, files, pro, os.path.join(self.fasta_path, prefix + ".fa"), "frag", prefix, args_srna) utr_derived_srna(args_srna) files["merge_csv"] = "_".join([self.prefixs["utr_table"], prefix]) files["merge_gff"] = "_".join([self.prefixs["utr"], prefix]) self._merge_frag_tex_file(files, args_srna) filter_utr(files["merge_gff"], files["merge_csv"], args_srna.min_utr) def _check_necessary_file(self, args_srna): if (args_srna.gffs is None) or (args_srna.trans is None) or ( (args_srna.tex_wigs is None) and ( args_srna.frag_wigs is None)): print("Error: lack required files!!!!") sys.exit() if args_srna.utr_srna: if (args_srna.tss_folder is None): print("Error: lack required TSS files for UTR " "derived sRNA detection!!!!") sys.exit() if (args_srna.pro_folder is None): print("Warning: lack Processing site files for UTR " "derived sRNA detection!!!") print("it may effect the results!!!!") self._check_gff(args_srna.gffs) self._check_gff(args_srna.trans) if args_srna.tss_folder is not None: self._check_gff(args_srna.tss_folder) self.multiparser.parser_gff(args_srna.tss_folder, "TSS") self.multiparser.combine_gff(args_srna.gffs, self.tss_path, None, "TSS") if args_srna.pro_folder is not None: self._check_gff(args_srna.pro_folder) self.multiparser.parser_gff(args_srna.pro_folder, "processing") self.multiparser.combine_gff(args_srna.gffs, self.pro_path, None, "processing") if args_srna.sorf_file is not None: self._check_gff(args_srna.sorf_file) self.multiparser.parser_gff(args_srna.sorf_file, "sORF") self.multiparser.combine_gff(args_srna.gffs, self.sorf_path, None, "sORF") if args_srna.utr_srna or ("sec_str" in args_srna.import_info) or ( "blast_nr" in args_srna.import_info) or ( "blast_srna" in args_srna.import_info): if args_srna.fastas is None: print("Error: lack required fasta files for UTR " "derived sRNA detection!!!!") sys.exit() self.multiparser.parser_fasta(args_srna.fastas) self.multiparser.combine_fasta(args_srna.gffs, self.fasta_path, None) if args_srna.terms is not None: self._check_gff(args_srna.terms) self.multiparser.parser_gff(args_srna.terms, "term") self.multiparser.combine_gff(args_srna.gffs, self.term_path, None, "term") else: self.term_path = None def _run_program(self, args_srna): prefixs = [] tss = None for gff in os.listdir(args_srna.gffs): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") prefixs.append(prefix) print("Running sRNA detection of {0}....".format(prefix)) tran = self.helper.get_correct_file( self.tran_path, "_transcript.gff", prefix, None, None) gffs = {"merge": "_".join([self.prefixs["merge"], prefix]), "utr": "_".join([self.prefixs["utr"], prefix]), "normal": "_".join([self.prefixs["normal"], prefix])} csvs = {"merge": "_".join([ self.prefixs["merge_table"], prefix]), "utr": "_".join([self.prefixs["utr_table"], prefix]), "normal": "_".join([ self.prefixs["normal_table"], prefix])} tss = self._run_normal( prefix, gff, tran, args_srna.fuzzy_tsss["inter"], args_srna) if args_srna.utr_srna: print("Running UTR derived sRNA detection of {0}".format( prefix)) if tss is None: tss = self.helper.get_correct_file( self.tss_path, "_TSS.gff", prefix, None, None) if self.pro_path is not None: pro = self.helper.get_correct_file( self.pro_path, "_processing.gff", prefix, None, None) else: pro = None if tss is not None: self._run_utrsrna(gff, tran, prefix, tss, pro, args_srna) self._merge_srna(args_srna, gffs, csvs, prefix, os.path.join(args_srna.gffs, gff), tss) filter_frag(csvs["merge"], gffs["merge"]) self.helper.sort_gff(gffs["merge"], "_".join([self.prefixs["basic"], prefix])) return prefixs def _merge_srna(self, args_srna, gffs, csvs, prefix, gff_file, tss): print("merging data of intergenic and UTR_derived sRNA...") merge_srna_gff(gffs, args_srna.in_cds, args_srna.cutoff_overlap, gff_file) merge_srna_table(gffs["merge"], csvs, os.path.join(args_srna.wig_path, "_".join([prefix, "forward.wig"])), os.path.join(args_srna.wig_path, "_".join([prefix, "reverse.wig"])), tss, args_srna) def _run_RNAfold(self, seq_file, vienna_path, sec_file): os.system(" ".join(["cat", seq_file, "|", os.path.join(vienna_path, "RNAfold"), "-p", ">", sec_file])) def _get_seq_sec(self, fasta_path, out_folder, prefix, sec_path, dot_path, vienna_path): detect = False for fasta in os.listdir(fasta_path): if fasta.endswith(".fa") and ( fasta.replace(".fa", "") == prefix): detect = True break if detect: detect = False seq_file = os.path.join(out_folder, "_".join(["sRNA_seq", prefix])) sec_file = os.path.join(out_folder, "_".join(["sRNA_2d", prefix])) self.helper.get_seq("_".join([self.prefixs["basic"], prefix]), os.path.join(fasta_path, fasta), seq_file) else: print("Error:There is not fasta file of {0}".format(prefix)) print("please check your imported information") sys.exit() tmp_path = os.path.join(out_folder, "tmp_srna") self.helper.check_make_folder(tmp_path) main_path = os.getcwd() os.chdir(tmp_path) sec_file = os.path.join(main_path, sec_file) seq_file = os.path.join(main_path, seq_file) tmp_sec_path = os.path.join(main_path, sec_path) tmp_dot_path = os.path.join(main_path, dot_path) self._run_RNAfold(seq_file, vienna_path, sec_file) extract_energy(os.path.join(main_path, "_".join([self.prefixs["basic"], prefix])), sec_file, os.path.join(main_path, "_".join([self.prefixs["energy"], prefix]))) for ps in os.listdir(os.getcwd()): new_ps = ps.replace("|", "_") shutil.move(ps, new_ps) return {"sec": tmp_sec_path, "dot": tmp_dot_path, "main": main_path, "tmp": os.path.join(main_path, tmp_path)} def _run_replot(self, vienna_util, tmp_paths, file_, dot_file, rel_file): os.system(" ".join([os.path.join(vienna_util, "relplot.pl"), os.path.join(tmp_paths["tmp"], file_), os.path.join(tmp_paths["tmp"], dot_file), ">", os.path.join(tmp_paths["tmp"], rel_file)])) def _convert_pdf(self, ps2pdf14_path, tmp_paths, file_, pdf_file): call([ps2pdf14_path, os.path.join(tmp_paths["tmp"], file_), pdf_file]) def _replot_sec_to_pdf(self, vienna_util, tmp_paths, ps2pdf14_path, prefix): for file_ in os.listdir(os.getcwd()): if file_.endswith("ss.ps"): dot_file = file_.replace("ss.ps", "dp.ps") rel_file = file_.replace("ss.ps", "rss.ps") print("replot {0}".format(file_)) self._run_replot(vienna_util, tmp_paths, file_, dot_file, rel_file) for file_ in os.listdir(tmp_paths["tmp"]): if (file_.endswith("rss.ps")) or (file_.endswith("dp.ps")): pdf_file = file_.replace(".ps", ".pdf") print("convert {0} to pdf".format(file_)) self._convert_pdf(ps2pdf14_path, tmp_paths, file_, pdf_file) os.mkdir(os.path.join(tmp_paths["sec"], prefix)) os.mkdir(os.path.join(tmp_paths["dot"], prefix)) self.helper.move_all_content( tmp_paths["tmp"], os.path.join(tmp_paths["sec"], prefix), ["rss.pdf"]) self.helper.move_all_content( tmp_paths["tmp"], os.path.join(tmp_paths["dot"], prefix), ["dp.pdf"]) def _run_mountain(self, vienna_util, tmp_paths, dot_file, out): call([os.path.join(vienna_util, "mountain.pl"), os.path.join(tmp_paths["tmp"], dot_file)], stdout=out) def _plot_mountain(self, mountain, moun_path, tmp_paths, prefix, vienna_util): if mountain: tmp_moun_path = os.path.join(tmp_paths["main"], moun_path) os.mkdir(os.path.join(tmp_moun_path, prefix)) txt_path = os.path.join(tmp_paths["tmp"], "tmp_txt") self.helper.check_make_folder(txt_path) print("Generating mountain plot of {0}....".format(prefix)) for dot_file in os.listdir(tmp_paths["tmp"]): if dot_file.endswith("dp.ps"): moun_txt = os.path.join(tmp_paths["tmp"], "mountain.txt") out = open(moun_txt, "w") moun_file = dot_file.replace("dp.ps", "mountain.pdf") print("Generating {0}".format(moun_file)) self._run_mountain(vienna_util, tmp_paths, dot_file, out) plot_mountain_plot(moun_txt, moun_file) shutil.move(moun_file, os.path.join(tmp_moun_path, prefix, moun_file)) out.close() os.remove(moun_txt) def _compute_2d_and_energy(self, args_srna, prefixs): print("Running energy calculation....") moun_path = os.path.join(args_srna.out_folder, "mountain_plot") sec_path = os.path.join(args_srna.out_folder, "sec_structure", "sec_plot") dot_path = os.path.join(args_srna.out_folder, "sec_structure", "dot_plot") self.helper.remove_all_content(sec_path, None, "dir") self.helper.remove_all_content(dot_path, None, "dir") self.helper.remove_all_content(moun_path, None, "dir") for prefix in prefixs: tmp_paths = self._get_seq_sec( self.fasta_path, args_srna.out_folder, prefix, sec_path, dot_path, args_srna.vienna_path) self._replot_sec_to_pdf(args_srna.vienna_util, tmp_paths, args_srna.ps2pdf14_path, prefix) self._plot_mountain(args_srna.mountain, moun_path, tmp_paths, prefix, args_srna.vienna_util) self.helper.remove_all_content(os.getcwd(), ".ps", "file") os.chdir(tmp_paths["main"]) shutil.move("_".join([self.prefixs["energy"], prefix]), "_".join([self.prefixs["basic"], prefix])) shutil.rmtree(os.path.join(args_srna.out_folder, "tmp_srna")) def _run_blast(self, blast_path, program, database, e, seq_file, blast_file, strand): call([os.path.join(blast_path, program), "-db", database, "-evalue", str(e), "-strand", strand, "-query", seq_file, "-out", blast_file]) def _get_strand_fasta(self, seq_file, out_folder): tmp_plus = os.path.join(out_folder, "tmp_plus.fa") tmp_minus = os.path.join(out_folder, "tmp_minus.fa") out_p = open(tmp_plus, "w") out_m = open(tmp_minus, "w") strand = "" with open(seq_file) as sh: for line in sh: line = line.strip() if line.startswith(">"): if line[-1] == "+": out_p.write(line + "\n") strand = "plus" elif line[-1] == "-": out_m.write(line + "\n") strand = "minus" else: if strand == "plus": out_p.write(line + "\n") elif strand == "minus": out_m.write(line + "\n") out_p.close() out_m.close() return tmp_plus, tmp_minus def _blast(self, database, database_format, data_type, args_srna, prefixs, program, database_type, e): if (database is None): print("Error: No database assigned!") else: if database_format: self._formatdb(database, data_type, args_srna.out_folder, args_srna.blast_path, database_type) for prefix in prefixs: blast_file = os.path.join( args_srna.out_folder, "blast_result_and_misc", "_".join([database_type, "blast", prefix + ".txt"])) srna_file = "_".join([self.prefixs["basic"], prefix]) out_file = os.path.join( args_srna.out_folder, "_".join(["tmp", database_type, prefix])) print("Running Blast of {0}".format(prefix)) seq_file = os.path.join( args_srna.out_folder, "_".join(["sRNA_seq", prefix])) if seq_file not in os.listdir(args_srna.out_folder): self.helper.get_seq( srna_file, os.path.join(self.fasta_path, prefix + ".fa"), seq_file) if database_type == "nr": tmp_plus, tmp_minus = self._get_strand_fasta( seq_file, args_srna.out_folder) tmp_blast = os.path.join("tmp_blast.txt") self._run_blast(args_srna.blast_path, program, database, e, tmp_plus, tmp_blast, "plus") self._run_blast(args_srna.blast_path, program, database, e, tmp_minus, blast_file, "minus") self.helper.merge_file(tmp_blast, blast_file) os.remove(tmp_blast) os.remove(tmp_plus) os.remove(tmp_minus) else: self._run_blast(args_srna.blast_path, program, database, e, seq_file, blast_file, "both") extract_blast(blast_file, srna_file, out_file, out_file + ".csv", database_type) shutil.move(out_file, srna_file) def _class_srna(self, prefixs, args_srna): if (len(args_srna.import_info) != 1) or ( len(args_srna.import_info) != 0): for prefix in prefixs: print("classifying sRNA of {0}".format(prefix)) class_gff = os.path.join(self.gff_output, "for_class") class_table = os.path.join(self.table_output, "for_class") self.helper.check_make_folder(os.path.join(class_table, prefix)) self.helper.check_make_folder(os.path.join(class_gff, prefix)) class_gff = os.path.join(class_gff, prefix) class_table = os.path.join(class_table, prefix) self.helper.check_make_folder(class_table) self.helper.check_make_folder(class_gff) out_stat = os.path.join( self.stat_path, "_".join([ "stat_sRNA_class", prefix + ".csv"])) classify_srna(os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), class_gff, out_stat, args_srna) for srna in os.listdir(class_gff): out_table = os.path.join( class_table, srna.replace(".gff", ".csv")) gen_srna_table( os.path.join(class_gff, srna), "_".join([self.prefixs["merge_table"], prefix]), "_".join([self.tmps["nr"], prefix + ".csv"]), "_".join([self.tmps["srna"], prefix + ".csv"]), args_srna, out_table) def _get_best_result(self, prefixs, args_srna): for prefix in prefixs: best_gff = os.path.join(self.all_best["best_gff"], "_".join([prefix, "sRNA.gff"])) best_table = os.path.join(self.all_best["best_table"], "_".join([prefix, "sRNA.csv"])) gen_best_srna(os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), best_gff, args_srna) gen_srna_table(os.path.join(self.all_best["best_gff"], "_".join([prefix, "sRNA.gff"])), "_".join([self.prefixs["merge_table"], prefix]), "_".join([self.tmps["nr"], prefix + ".csv"]), "_".join([self.tmps["srna"], prefix + ".csv"]), args_srna, best_table) def _remove_file(self, args_srna): self.helper.remove_all_content(args_srna.out_folder, "tmp_", "dir") self.helper.remove_all_content(args_srna.out_folder, "tmp_", "file") self.helper.remove_tmp(args_srna.fastas) self.helper.remove_tmp(args_srna.gffs) if args_srna.frag_wigs is not None: self.helper.remove_tmp(args_srna.frag_wigs) if args_srna.tex_wigs is not None: self.helper.remove_tmp(args_srna.tex_wigs) if (args_srna.frag_wigs is not None) and ( args_srna.tex_wigs is not None): shutil.rmtree(args_srna.merge_wigs) self.helper.remove_tmp(args_srna.trans) if args_srna.tss_folder is not None: self.helper.remove_tmp(args_srna.tss_folder) if args_srna.pro_folder is not None: self.helper.remove_tmp(args_srna.pro_folder) if args_srna.sorf_file is not None: self.helper.remove_tmp(args_srna.sorf_file) if "tmp_median" in os.listdir(args_srna.out_folder): os.remove(os.path.join(args_srna.out_folder, "tmp_median")) if self.term_path is not None: self.helper.remove_tmp(args_srna.terms) def _filter_srna(self, args_srna, prefixs): if "sec_str" in args_srna.import_info: self._compute_2d_and_energy(args_srna, prefixs) if "blast_nr" in args_srna.import_info: self._blast(args_srna.nr_database, args_srna.nr_format, "prot", args_srna, prefixs, "blastx", "nr", args_srna.e_nr) if "blast_srna" in args_srna.import_info: self._blast(args_srna.srna_database, args_srna.srna_format, "nucl", args_srna, prefixs, "blastn", "sRNA", args_srna.e_srna) if "sorf" in args_srna.import_info: for prefix in prefixs: if ("_".join([prefix, "sORF.gff"]) in os.listdir(self.sorf_path)): tmp_srna = os.path.join(args_srna.out_folder, "".join(["tmp_srna_sorf", prefix])) tmp_sorf = os.path.join(args_srna.out_folder, "".join(["tmp_sorf_srna", prefix])) srna_sorf_comparison( "_".join([self.prefixs["basic"], prefix]), os.path.join(self.sorf_path, "_".join([prefix, "sORF.gff"])), tmp_srna, tmp_sorf) os.remove(tmp_sorf) shutil.move(tmp_srna, "_".join([self.prefixs["basic"], prefix])) def _import_info_format(self, import_info): new_info = [] for info in import_info: info = info.lower() new_info.append(info) return new_info def _gen_table(self, prefixs, args_srna): for prefix in prefixs: out_table = os.path.join(self.all_best["all_table"], "_".join([prefix, "sRNA.csv"])) gen_srna_table(os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), "_".join([self.prefixs["merge_table"], prefix]), "_".join([self.tmps["nr"], prefix + ".csv"]), "_".join([self.tmps["srna"], prefix + ".csv"]), args_srna, out_table) def _print_rank_all(self, prefixs): for prefix in prefixs: all_table = os.path.join(self.all_best["all_table"], "_".join([prefix, "sRNA.csv"])) best_table = os.path.join(self.all_best["best_table"], "_".join([prefix, "sRNA.csv"])) print_rank_all(all_table, best_table) def _filter_min_utr(self, prefixs, min_utr): for prefix in prefixs: filter_utr(os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), os.path.join(self.all_best["all_table"], "_".join([prefix, "sRNA.csv"])), min_utr) def _antisense(self, gffs, prefixs): for prefix in prefixs: all_table = os.path.join(self.all_best["all_table"], "_".join([prefix, "sRNA.csv"])) best_table = os.path.join(self.all_best["best_table"], "_".join([prefix, "sRNA.csv"])) all_gff = os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])) best_gff = os.path.join(self.all_best["best_gff"], "_".join([prefix, "sRNA.gff"])) srna_antisense(all_gff, all_table, os.path.join(gffs, prefix + ".gff")) srna_antisense(best_gff, best_table, os.path.join(gffs, prefix + ".gff")) def _blast_stat(self, stat_path, srna_tables): for srna_table in os.listdir(os.path.join(srna_tables, "best")): out_srna_blast = os.path.join( stat_path, "stat_" + srna_table.replace(".csv", "_blast.csv")) blast_class(os.path.join(srna_tables, "best", srna_table), out_srna_blast) def _compare_term_promoter(self, out_table, prefix, args_srna): if ("term" in args_srna.import_info) and ( self.term_path is not None): compare_srna_term(os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), out_table, os.path.join(self.term_path, "_".join([prefix, "term.gff"])), args_srna.fuzzy_b, args_srna.fuzzy_a) if ("promoter" in args_srna.import_info) and ( args_srna.promoter_table is not None) and ( "tss" in args_srna.import_info): compare_srna_promoter(os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), out_table, args_srna) def run_srna_detection(self, args_srna): self._check_necessary_file(args_srna) self.multiparser.parser_gff(args_srna.trans, "transcript") self.multiparser.combine_gff(args_srna.gffs, self.tran_path, None, "transcript") args_srna.import_info = self._import_info_format(args_srna.import_info) prefixs = self._run_program(args_srna) self._filter_srna(args_srna, prefixs) for prefix in prefixs: shutil.copyfile("_".join([self.prefixs["basic"], prefix]), os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"]))) self._compare_term_promoter("_".join([self.prefixs["merge_table"], prefix]), prefix, args_srna) self._gen_table(prefixs, args_srna) self._class_srna(prefixs, args_srna) self._get_best_result(prefixs, args_srna) self._print_rank_all(prefixs) if "blast_srna" in args_srna.import_info: self._blast_stat(self.stat_path, self.table_output) self._remove_file(args_srna)
class Controller(object): """Manage the actions of the subcommands. The Controller take care of providing the argumentes like path names and the parallel processing of tasks. """ def __init__(self, args): """Create an instance.""" self._args = args if (len(args.__dict__) > 3): if not os.path.exists(args.project_path): print("Error: --project_path does not exists!") sys.exit() self._paths = Paths(args.project_path) self.args_container = ArgsContainer() self.helper = Helper() def check_folder(self, folders, flags): '''Check the emtpy or wrong assigned folder''' for folder, flag in zip(folders, flags): if folder is None: print("Error: {0} is wrong. Please check it!".format(flag)) sys.exit() else: if os.path.exists(folder): if len(os.listdir(folder)) == 0: print("Error: {0} is a empty folder!".format(flag)) sys.exit() else: print("Error: {0} is wrong. Please check it!".format( flag)) sys.exit() def check_multi_files(self, input_files, flags): if input_files is not None: for files, flag in zip(input_files, flags): if files is not None: for file_ in files: if not os.path.exists(file_): print("Error: Some files in {0} do " "not exist!".format(flag)) sys.exit() def check_parameter(self, paras, names): '''Check the parameter is assigned correct or not''' for i in range(len(paras)): if paras[i] is None: print("Error: {0} is wrong. " "Please check it!".format(names[i])) sys.exit() def check_no_require_folder(self, folders): '''Check the folders which are not necessary. It should not be assigned a empty or wrong folder''' for folder in folders: if folder is not None: if os.path.exists(folder): if len(os.listdir(folder)) == 0: print("Error: There is a empty folder. " "Please check it!") sys.exit() else: print("Error: There is a wrong folder. " "Please check it!") sys.exit() def check_execute_file(self, exe): detect = False if os.path.exists(exe): detect = True full_exe = os.path.realpath(exe) for folder in os.environ["PATH"].split(":"): if os.path.exists(os.path.join(folder, exe)): detect = True full_exe = exe if not detect: if os.path.exists(os.path.realpath(exe)): full_exe = os.path.realpath(exe) else: print("Error: {0} can't be found!".format(exe)) print("Please assign the correct path!") sys.exit() return full_exe def check_file(self, files, names, require): '''Check the path of file''' for i in range(len(files)): if require: if files[i] is None: print("Error: {0} is wrong. " "Please check it!".format(names[i])) sys.exit() else: if not os.path.isfile(files[i]): print("Error: There is a wrong path of {0}. " "Please check it!".format(names[i])) sys.exit() else: if files[i] is not None: if not os.path.isfile(files[i]): print("Error: There is a wrong path of {0}. " "Please check it!".format(names[i])) sys.exit() def create_project(self, version): """Create a new project.""" project_creator.create_root_folder(self._args.project_path) project_creator.create_subfolders(self._paths.required_folders("root")) project_creator.create_version_file( self._paths.version_path, version) sys.stdout.write("Created folder \"%s\" and required subfolders.\n" % ( self._args.project_path)) def get_input(self): """Download required files from website.""" print("Running get input files") if self._args.ftp_path is None: print("Error: Please assign the path for downloading the data!") sys.exit() annotation_folder = self._paths.ref_annotation_folder fasta_folder = self._paths.ref_fasta_folder self.helper.check_make_folder(self._paths.ref_annotation_folder) self.helper.check_make_folder(self._paths.ref_fasta_folder) if self._args.ref_gff is True: get_file(self._args.ftp_path, self._paths.ref_annotation_folder, "gff") get_file(self._args.ftp_path, self._paths.ref_annotation_folder, "_genomic.gff.gz") if self._args.ref_fasta is True: get_file(self._args.ftp_path, self._paths.ref_fasta_folder, "fna") get_file(self._args.ftp_path, self._paths.ref_fasta_folder, "_genomic.fna.gz") if self._args.ref_gbk is True: get_file(self._args.ftp_path, self._paths.ref_annotation_folder, "gbk") get_file(self._args.ftp_path, self._paths.ref_annotation_folder, "gbff") get_file(self._args.ftp_path, self._paths.ref_annotation_folder, "_genomic.gbff.gz") if self._args.ref_ptt is True: get_file(self._args.ftp_path, self._paths.ref_annotation_folder, "ptt") if self._args.ref_rnt is True: get_file(self._args.ftp_path, self._paths.ref_annotation_folder, "rnt") if self._args.convert_embl is True: annotation_files = os.listdir(self._paths.ref_annotation_folder) if len(annotation_files) == 0: sys.stdout.write("No gff files!!\n") else: Converter().convert_gbk2embl(self._paths.ref_annotation_folder) def get_target_fasta(self): """Get target fasta""" print("Running update genome fasta") self.check_multi_files([self._args.related_fasta_files], ["--related_fasta_files"]) self.check_file([self._args.mutation_table], "--mutation_table", True) project_creator.create_subfolders( self._paths.required_folders("get_target_fasta")) target = TargetFasta(self._paths.tar_fasta_folder, self._args.related_fasta_files) target.get_target_fasta( self._args.mutation_table, self._paths.tar_fasta_folder, self._args.related_fasta_files, self._args.combine_to_one_fasta, self._paths.target_base_folder) def ratt(self): """Run RATT to transfer annotation file from reference to target.""" print("Running annotation transfer") if (self._args.transfer_type != "Strain") and ( self._args.transfer_type != "Assembly") and ( self._args.transfer_type != "Species") and ( self._args.transfer_type != "Assembly.Repetitive") and ( self._args.transfer_type != "Strain.Repetitive") and ( self._args.transfer_type != "Species.Repetitive") and ( self._args.transfer_type != "Multiple") and ( self._args.transfer_type != "Free"): print("Error: please assign correct --transfer_type!") sys.exit() if (self._args.related_embl_files is None) and ( self._args.related_gbk_files is None): print("Error: please assign proper embl or genbank files") sys.exit() elif (self._args.related_embl_files is not None) and ( self._args.related_gbk_files is not None): print("Error: please choose embl as input or genbank as input") sys.exit() self._args.ratt_path = self.check_execute_file(self._args.ratt_path) self.check_multi_files( [self._args.target_fasta_files, self._args.related_fasta_files], ["--target_fasta_files", "--closed_fasta_files"]) self.check_parameter([self._args.element, self._args.compare_pair], ["--element", "--compare_pair"]) project_creator.create_subfolders( self._paths.required_folders("get_target_fasta")) project_creator.create_subfolders( self._paths.required_folders("annotation_transfer")) args_ratt = self.args_container.container_ratt( self._args.ratt_path, self._args.element, self._args.transfer_type, self._args.related_embl_files, self._args.related_gbk_files, self._args.target_fasta_files, self._args.related_fasta_files, self._paths.ratt_folder, self._args.convert_to_gff_rnt_ptt, self._paths.tar_annotation_folder, self._args.compare_pair) ratt = RATT(args_ratt) ratt.annotation_transfer(args_ratt) def tsspredator(self): """Run TSSpredator for predicting TSS candidates.""" self.check_multi_files( [self._args.fasta_files, self._args.annotation_files, self._args.compare_overlap_gff, self._args.manual_files, self._args.compare_transcript_files], ["--fasta_files", "--annotation_files", "--compare_overlap_gff", "--manual_files","--compare_transcript_files"]) self.check_parameter([self._args.tex_notex_libs, self._args.condition_names], ["--tex_notex_libs", "--condition_names"]) self._args.tsspredator_path = self.check_execute_file( self._args.tsspredator_path) if self._args.program.lower() == "tss": print("Running TSS prediction") project_creator.create_subfolders( self._paths.required_folders("TSS")) out_folder = self._paths.tsspredator_folder elif self._args.program.lower() == "ps": print("Running processing site prediction") out_folder = self._paths.processing_site_folder project_creator.create_subfolders( self._paths.required_folders("processing")) else: print("Error: No such program!") sys.exit() args_tss = self.args_container.container_tsspredator( self._args.tsspredator_path, self._args.program, self._args.fasta_files, self._args.annotation_files, self._args.tex_notex_libs, self._args.condition_names, self._args.height, self._args.height_reduction, self._args.factor, self._args.factor_reduction, self._args.base_height, self._args.enrichment_factor, self._args.processing_factor, self._args.replicate_tex, out_folder, self._args.validate_gene, self._args.manual_files, self._args.curated_sequence_length, self._args.compare_transcript_files, self._args.tolerance, self._args.utr_length, self._args.cluster, self._args.re_check_orphan, self._args.remove_overlap_feature, self._args.compare_overlap_gff, self._args.remove_low_expression) tsspredator = TSSpredator(args_tss) tsspredator.run_tsspredator(args_tss) def optimize(self): """opimize TSSpredator""" self.check_multi_files( [self._args.fasta_files, self._args.annotation_files, self._args.manual_files], ["--fasta_files", "--annotation_files", "--manual_files"]) self._args.tsspredator_path = self.check_execute_file( self._args.tsspredator_path) self.check_parameter([self._args.tex_notex_libs, self._args.condition_names], ["--tex_notex_lib", "--condition_names"]) if self._args.program.lower() == "tss": print("Running optimization of TSS prediction") project_creator.create_subfolders( self._paths.required_folders("TSS")) out_folder = self._paths.tsspredator_folder elif self._args.program.lower() == "ps": print("Running optimization of processing site prediction") out_folder = self._paths.processing_site_folder project_creator.create_subfolders( self._paths.required_folders("processing")) else: print("Error: No such program!") sys.exit() args_ops = self.args_container.container_optimize( self._args.tsspredator_path, self._args.fasta_files, self._args.annotation_files, self._args.manual_files, out_folder, self._args.max_height, self._args.max_height_reduction, self._args.max_factor, self._args.max_factor_reduction, self._args.max_base_height, self._args.max_enrichment_factor, self._args.max_processing_factor, self._args.utr_length, self._args.tex_notex_libs, self._args.condition_names, self._args.cluster, self._args.curated_sequence_length, self._args.parallels, self._args.program, self._args.replicate_tex, self._args.steps) optimize_tss(args_ops) def color(self): """color the screenshots""" print("Running png files coloring") self.check_parameter([self._args.track_number], ["--track_numer"]) self.check_folder([self._args.screenshot_folder], ["--screenshot_folder"]) self._args.imagemagick_covert_path = self.check_execute_file( self._args.imagemagick_covert_path) color = ColorPNG() color.generate_color_png( self._args.track_number, self._args.screenshot_folder, self._args.imagemagick_covert_path) def terminator(self): """Run TransTermHP and Gene converaged for detecting terminators""" print("Running terminator prediction") if self._args.transterm_path is None: print("Please assign the path of transterm in TransTermHP.") self.check_multi_files( [self._args.fasta_files, self._args.annotation_files, self._args.transcript_files, self._args.srna_files], ["--fasta_files", "--annotation_files", "--transcript_files", "--srna_files"]) for prop in ("transterm_path", "expterm_path", "rnafold_path"): setattr(self._args, prop, self.check_execute_file(getattr(self._args, prop))) project_creator.create_subfolders( self._paths.required_folders("terminator")) args_term = self.args_container.container_terminator( self._args.transterm_path, self._args.expterm_path, self._args.rnafold_path, self._paths.transterm_folder, self._args.fasta_files, self._args.annotation_files, self._args.transcript_files, self._args.srna_files, self._args.decrease, self._args.highest_coverage, self._args.tolerance_detect_coverage, self._args.tolerance_within_transcript, self._args.tolerance_downstream_transcript, self._args.tolerance_within_gene, self._args.tolerance_downstream_gene, self._paths.transtermhp_folder, self._args.tex_notex_libs, self._args.frag_libs, self._args.tex_notex, self._args.replicate_tex, self._args.replicate_frag, self._args.table_best, self._args.min_loop_length, self._args.max_loop_length, self._args.min_stem_length, self._args.max_stem_length, self._args.min_u_tail, self._args.miss_rate, self._args.mutation_u_tail, self._args.keep_multi_term, self._args.window_size, self._args.window_shift) terminator = Terminator(args_term) terminator.run_terminator(args_term) def transcript(self): """Run Transcript detection""" print("Running transcript detection") self.check_multi_files( [self._args.annotation_files, self._args.tss_files, self._args.terminator_files], ["--annotation_files", "--tss_files", "--terminator_files"]) project_creator.create_subfolders( self._paths.required_folders("transcript")) args_tran = self.args_container.container_transcript( self._args.tex_notex, self._args.modify_transcript, self._args.length, self._args.annotation_files, self._args.height, self._args.width, self._args.tolerance, self._args.tolerance_coverage, self._args.replicate_tex, self._args.replicate_frag, self._paths.transcript_output_folder, self._args.tss_files, self._args.tss_tolerance, self._args.tex_notex_libs, self._args.frag_libs, self._args.compare_feature_genome, self._args.table_best, self._args.terminator_files, self._args.terminator_tolerance, self._args.max_length_distribution) transcript = TranscriptDetection(args_tran) transcript.run_transcript(args_tran) def utr_detection(self): """Run UTR detection.""" print("Running UTR detection") self.check_multi_files( [self._args.annotation_files, self._args.terminator_files, self._args.transcript_files, self._args.tss_files], ["--annotation_files", "--terminator_files", "--transcript_files", "--tss_files"]) project_creator.create_subfolders(self._paths.required_folders("utr")) args_utr = self.args_container.container_utr( self._args.tss_files, self._args.annotation_files, self._args.transcript_files, self._args.terminator_files, self._args.terminator_tolerance, self._paths.utr_folder, self._args.tss_source, self._args.base_5utr, self._args.utr_length, self._args.base_3utr, self._args.tolerance_3utr, self._args.tolerance_5utr) utr = UTRDetection(args_utr) utr.run_utr_detection(args_utr) def _check_filter_input(self, files, info, filters): if files is None: print("Error: The {0} has to be provided " "if \"{1}\" in --filter_info!".format(info, filters)) sys.exit() def _check_database(self, database, flag, info): wrong = False if database is None: wrong = True elif not os.path.isfile(database): if (os.path.isfile(database + ".fa")) or ( os.path.isfile(database + ".fna")) or ( os.path.isfile(database + ".fasta")): wrong = False else: wrong = True if wrong: print("Error: {0} is required if {1} is in --filter_info. " "But the assignment of {0} is empty or wrong. " "Please check the {0} or remove {1} from " "--filter_info!".format(flag, info)) sys.exit() def srna_detection(self): """sRNA_detection.""" print("Running sRNA prediction") self.check_multi_files( [self._args.annotation_files, self._args.transcript_files, self._args.fasta_files, self._args.sorf_files, self._args.terminator_files, self._args.promoter_tables, self._args.processing_site_files], ["--annotation_files", "--transcript_files", "--fasta_files", "--sorf_files", "--terminator_files", "--promoter_tables", "--processing_site_files"]) for info in self._args.filter_info: if "sec_str" == info: if not self._args.compute_sec_structures: print("Error: --compute_sec_structures is not switch on, " "but sec_str is still in --filter_info.") sys.exit() self._check_filter_input( self._args.fasta_files, "fasta file", "sec_str") for prop in ("rnafold_path", "relplot_path", "mountain_path"): setattr(self._args, prop, self.check_execute_file(getattr(self._args, prop))) elif ("blast_nr" == info) or ( "blast_srna"== info): for prop in ("blastn_path", "blastx_path", "makeblastdb_path"): setattr(self._args, prop, self.check_execute_file(getattr(self._args, prop))) if ("blast_nr" == info): self._check_database(self._args.nr_database_path, "--nr_database_path", "blast_nr") if ("blast_srna" == info): self._check_database(self._args.srna_database_path, "--srna_database_path", "blast_srna") elif "sorf" == info: self._check_filter_input( self._args.sorf_files, "sORF", "sorf") elif "term" == info: self._check_filter_input(self._args.terminator_files, "terminator", "term") elif "promoter" == info: self._check_filter_input(self._args.promoter_tables, "Promoter", "promoter") elif "tss" == info: self._check_filter_input(self._args.tss_files, "TSS", "tss") else: if "none" != info.lower(): print("Error: Please check the --filter_info, " "invalid value was assigned!") sys.exit() if self._args.utr_derived_srna: if self._args.tss_files is None: print("Error: The TSS has to be provided " "if you want to compute UTR-derived sRNA!") sys.exit() if self._args.search_poly_u != 0: if self._args.fasta_files is None: print("Error: The fasta files have to be provided " "if you want to extend 3'end of sRNA by " "searching poly U tail!") sys.exit() project_creator.create_subfolders(self._paths.required_folders("srna")) args_srna = self.args_container.container_srna( self._args.rnafold_path, self._args.relplot_path, self._args.mountain_path, self._args.blastn_path, self._args.blastx_path, self._args.makeblastdb_path, self._paths.srna_folder, self._args.utr_derived_srna, self._args.annotation_files, self._args.tss_files, self._args.transcript_files, self._args.tss_intergenic_antisense_tolerance, self._args.tss_5utr_tolerance, self._args.tss_3utr_tolerance, self._args.tss_intercds_tolerance, self._args.filter_info, self._args.processing_site_files, self._args.fasta_files, self._args.mountain_plot, self._args.nr_format, self._args.srna_format, self._args.srna_database_path, self._args.nr_database_path, self._args.cutoff_energy, self._args.parallel_blast, self._args.min_intergenic_tex_coverage, self._args.min_intergenic_notex_coverage, self._args.min_intergenic_fragmented_coverage, self._args.min_complete_5utr_transcript_coverage, self._args.min_antisense_tex_coverage, self._args.min_antisense_notex_coverage, self._args.min_antisense_fragmented_coverage, self._args.min_utr_tex_coverage, self._args.min_utr_notex_coverage, self._args.min_utr_fragmented_coverage, self._args.max_length, self._args.min_length, self._args.tex_notex_libs, self._args.frag_libs, self._args.replicate_tex, self._args.replicate_frag, self._args.tex_notex, self._args.blast_e_nr, self._args.blast_e_srna, self._args.detect_srna_in_cds, self._args.table_best, self._args.decrease_intergenic_antisense, self._args.decrease_utr, self._args.tolerance_intergenic_antisense, self._args.tolerance_utr, self._args.cutoff_nr_hit, self._args.sorf_files, self._args.overlap_percent_cds, self._args.terminator_files, self._args.terminator_tolerance_in_srna, self._args.terminator_tolerance_out_srna, self._args.ignore_hypothetical_protein, self._args.tss_source, self._args.min_all_utr_coverage, self._args.promoter_tables, self._args.ranking_time_promoter, self._args.promoter_names, self._args.compute_sec_structures, self._args.search_poly_u, self._args.min_u_poly_u, self._args.mutation_poly_u) srna = sRNADetection(args_srna) srna.run_srna_detection(args_srna) def sorf_detection(self): """sORF_detection.""" print("Running sORF prediction") self.check_multi_files( [self._args.transcript_files, self._args.annotation_files, self._args.fasta_files, self._args.srna_files, self._args.tss_files], ["--transcript_files", "--annotation_files", "--fasta_files", "--srna_files", "--tss_files"]) project_creator.create_subfolders( self._paths.required_folders("sorf")) args_sorf = self.args_container.container_sorf( self._paths.sorf_folder, self._args.utr_derived_sorf, self._args.transcript_files, self._args.annotation_files, self._args.tss_files, self._args.utr_length, self._args.min_length, self._args.max_length, self._args.cutoff_intergenic_coverage, self._args.cutoff_antisense_coverage, self._args.cutoff_5utr_coverage, self._args.cutoff_3utr_coverage, self._args.cutoff_intercds_coverage, self._args.fasta_files, self._args.tex_notex_libs, self._args.frag_libs, self._args.tex_notex, self._args.replicate_tex, self._args.replicate_frag, self._args.table_best, self._args.srna_files, self._args.start_codon, self._args.stop_codon, self._args.cutoff_base_coverage, self._args.tolerance_rbs, self._args.rbs_not_after_tss, self._args.print_all_combination, self._args.best_no_srna, self._args.best_no_tss, self._args.ignore_hypothetical_protein, self._args.min_rbs_distance, self._args.max_rbs_distance, self._args.tolerance_3end, self._args.tolerance_5end) sorf = sORFDetection(args_sorf) sorf.run_sorf_detection(args_sorf) def meme(self): """promoter detectopn""" print("Running promoter detection") self.check_multi_files( [self._args.tss_files, self._args.fasta_files], ["--tss_files", "--fasta_files"]) if not self._args.tss_source: self.check_multi_files([self._args.annotation_files], ["--annotation_files"]) if (self._args.program == "both") or ( self._args.program == "meme"): self._args.meme_path = self.check_execute_file(self._args.meme_path) elif (self._args.program == "both") or ( self._args.program == "glam2"): self._args.glam2_path = self.check_execute_file(self._args.glam2_path) project_creator.create_subfolders( self._paths.required_folders("promoter")) args_pro = self.args_container.container_promoter( self._args.meme_path, self._args.glam2_path, self._paths.promoter_output_folder, self._args.tex_libs, self._args.tss_files, self._args.fasta_files, self._args.num_motifs, self._args.nt_before_tss, self._args.motif_width, self._args.tss_source, self._args.annotation_files, self._args.end_run, self._args.combine_all, self._args.e_value, self._args.parallels, self._args.program) meme = MEME(args_pro) meme.run_meme(args_pro) def operon(self): """operon detection""" print("Running operon detection") self.check_multi_files( [self._args.tss_files, self._args.annotation_files, self._args.transcript_files, self._args.utr5_files, self._args.utr3_files, self._args.terminator_files], ["--tss_files", "--annotation_files", "--transcript_files", "--utr5_files", "--utr3_files", "--terminator_files"]) project_creator.create_subfolders( self._paths.required_folders("operon")) args_op = self.args_container.container_operon( self._args.tss_files, self._args.annotation_files, self._args.transcript_files, self._args.utr5_files, self._args.utr3_files, self._args.terminator_files, self._args.tss_tolerance, self._args.terminator_tolerance, self._args.min_length, self._paths.operon_output_folder, self._paths.operon_statistics_folder) operon = OperonDetection(args_op) operon.run_operon(args_op) def circrna(self): """circRNA detection""" print("Running circular RNA prediction") if self._args.read_files: self._args.segemehl_path = self.check_execute_file( self._args.segemehl_path) for prop in ("testrealign_path", "samtools_path"): setattr(self._args, prop, self.check_execute_file(getattr(self._args, prop))) self.check_multi_files( [self._args.fasta_files, self._args.annotation_files], ["--fasta_files", "--annotation_files"]) project_creator.create_subfolders( self._paths.required_folders("circrna")) args_circ = self.args_container.container_circrna( self._args.parallels, self._args.fasta_files, self._args.annotation_files, self._args.bam_files, self._args.read_files, self._paths.circrna_stat_folder, self._args.support_reads, self._args.segemehl_path, self._args.testrealign_path, self._args.samtools_path, self._args.start_ratio, self._args.end_ratio, self._args.ignore_hypothetical_protein, self._paths.circrna_output_folder) circ = CircRNADetection(args_circ) circ.run_circrna(args_circ) def goterm(self): """Go term discovery""" print("Running GO term mapping") self.check_multi_files( [self._args.annotation_files, self._args.transcript_files], ["--annotation_files", "--transcript_files"]) self.check_file([self._args.uniprot_id, self._args.go_obo, self._args.goslim_obo], ["--uniprot_id", "--go.obo", "--goslim_obo"], True) project_creator.create_subfolders( self._paths.required_folders("go_term")) args_go = self.args_container.container_goterm( self._args.annotation_files, self._paths.goterm_output_folder, self._args.uniprot_id, self._args.go_obo, self._args.goslim_obo, self._args.transcript_files) goterm = GoTermFinding(args_go) goterm.run_go_term(args_go) def srna_target(self): """sRNA target prediction""" print("Running sRNA target prediction") self.check_multi_files( [self._args.fasta_files, self._args.srna_files, self._args.annotation_files], ["--fasta_files", "--srna_files", "--annotation_files"]) if "RNAup" in self._args.program: self._args.rnaup_path = self.check_execute_file( self._args.rnaup_path) if "RNAplex" in self._args.program: for prop in ("rnaplfold_path", "rnaplex_path"): setattr(self._args, prop, self.check_execute_file(getattr(self._args, prop))) if "IntaRNA" in self._args.program: self._args.intarna_path = self.check_execute_file( self._args.intarna_path) if self._args.mode_intarna is None: print("Error: --mode_IntaRNA need to be assigned!") sys.exit() project_creator.create_subfolders( self._paths.required_folders("srna_target")) args_tar = self.args_container.container_srna_target( self._args.rnaplfold_path, self._args.rnaplex_path, self._args.rnaup_path, self._args.intarna_path, self._args.annotation_files, self._args.fasta_files, self._args.srna_files, self._args.query_srnas, self._args.program, self._args.interaction_length, self._args.window_size_target_rnaplex, self._args.span_target_rnaplex, self._args.window_size_srna_rnaplfold, self._args.span_srna_rnaplfold, self._args.unstructured_region_rnaplex_target, self._args.unstructured_region_rnaplex_srna, self._args.unstructured_region_rnaup, self._args.energy_threshold_rnaplex, self._args.duplex_distance_rnaplex, self._args.top, self._paths.starget_output_folder, self._args.parallels_rnaplex, self._args.parallels_rnaup, self._args.parallels_intarna, self._args.continue_rnaup, self._args.slide_window_size_srna_intarna, self._args.max_loop_length_srna_intarna, self._args.slide_window_size_target_intarna, self._args.max_loop_length_target_intarna, self._args.mode_intarna, self._args.potential_target_start, self._args.potential_target_end, self._args.target_feature) srnatarget = sRNATargetPrediction(args_tar) srnatarget.run_srna_target_prediction(args_tar) def snp(self): """SNP transcript detection""" print("Running SNP/mutations calling") self.check_multi_files( [self._args.fasta_files], ["--fasta_files"]) if (self._args.bam_type != "related_genome") and ( self._args.bam_type != "reference_genome"): print("Error: Please assign \"related_genome\" or" " \"reference_genome\" to --bam_type!") sys.exit() if (self._args.ploidy != "haploid") and ( self._args.ploidy != "diploid"): print("Error: Please assign \"haploid\" or" " \"diploid\" to --chromosome_type!") if (self._args.caller != "c") and ( self._args.caller != "m"): print("Error: Please assign \"c\" or" " \"m\" to --caller!") for prop in ("bcftools_path", "samtools_path"): setattr(self._args, prop, self.check_execute_file(getattr(self._args, prop))) project_creator.create_subfolders(self._paths.required_folders("snp")) args_snp = self.args_container.container_snp( self._args.samtools_path, self._args.bcftools_path, self._args.bam_type, self._args.program, self._args.fasta_files, self._args.bam_files, self._args.quality, self._args.read_depth_range, self._paths.snp_output_folder, self._args.indel_fraction, self._args.ploidy, self._args.rg_tag, self._args.caller, self._args.filter_tag_info, self._args.dp4_cutoff) snp = SNPCalling(args_snp) snp.run_snp_calling(args_snp) def ppi(self): """PPI network retrieve""" print("Running protein-protein interaction networks prediction") self.check_multi_files([self._args.annotation_files], ["--annotation_files"]) self.check_parameter([self._args.query_strains, self._args.species_string], ["--query_strains", "--species_string"]) project_creator.create_subfolders( self._paths.required_folders("ppi_network")) args_ppi = self.args_container.container_ppi( self._args.annotation_files, self._args.query_strains, self._args.without_strain_pubmed, self._args.species_string, self._args.score, self._paths.ppi_output_folder, self._args.node_size, self._args.query) ppi = PPINetwork(self._paths.ppi_output_folder) ppi.retrieve_ppi_network(args_ppi) def sublocal(self): """Subcellular Localization prediction""" print("Running subcellular localization prediction") self.check_multi_files( [self._args.annotation_files, self._args.fasta_files, self._args.transcript_files], ["--annotation_files", "--fasta_files", "--transcript_files"]) if (self._args.bacteria_type != "positive") and ( self._args.bacteria_type != "negative"): print("Error: Please assign \"positive\" or" " \"negative\" to --bacteria_type!") sys.exit() self._args.psortb_path = self.check_execute_file(self._args.psortb_path) project_creator.create_subfolders( self._paths.required_folders("subcellular_localization")) args_sub = self.args_container.container_sublocal( self._args.psortb_path, self._args.annotation_files, self._args.fasta_files, self._args.bacteria_type, self._args.difference_multi, self._paths.sublocal_output_folder, self._args.transcript_files) sublocal = SubLocal(args_sub) sublocal.run_sub_local(args_sub) def ribos(self): """riboswitch and RNA thermometer prediction""" print("Running riboswitch and RNA thermometer prediction") self.check_multi_files( [self._args.annotation_files, self._args.fasta_files, self._args.tss_files, self._args.transcript_files], ["--annotation_files", "--fasta_files", "--tss_files", "--transcript_files"]) if (self._args.program == "both"): self.check_file([self._args.riboswitch_id_file, self._args.rfam_path], ["--riboswitch_id_file", "--rfam_path"], True) self.check_file([self._args.rna_thermometer_id_file, self._args.rfam_path], ["--rna_thermometer_id_file", "--rfam_path"], True) project_creator.create_subfolders( self._paths.required_folders("riboswitch")) project_creator.create_subfolders( self._paths.required_folders("thermometer")) ribos_path = self._paths.ribos_output_folder thermo_path = self._paths.thermo_output_folder elif (self._args.program == "thermometer"): self.check_file([self._args.rna_thermometer_id_file, self._args.rfam_path], ["--thermometer_id_file", "--rfam_path"], True) project_creator.create_subfolders( self._paths.required_folders("thermometer")) ribos_path = None thermo_path = self._paths.thermo_output_folder elif (self._args.program == "riboswitch"): self.check_file([self._args.riboswitch_id_file, self._args.rfam_path], ["--riboswitch_id_file", "--rfam_path"], True) project_creator.create_subfolders( self._paths.required_folders("riboswitch")) ribos_path = self._paths.ribos_output_folder thermo_path = None else: print("Error: Please assign \"thermometer\", \"riboswitch\" " "or \"both\" in --program!") sys.exit() self._args.cmscan_path = self.check_execute_file(self._args.cmscan_path) self._args.cmpress_path = self.check_execute_file(self._args.cmpress_path) args_ribo = self.args_container.container_ribos( self._args.program, self._args.rna_thermometer_id_file, self._args.cmscan_path, self._args.cmpress_path, self._args.riboswitch_id_file, self._args.annotation_files, self._args.fasta_files, self._args.tss_files, self._args.transcript_files, self._args.rfam_path, ribos_path, thermo_path, self._args.e_value, self._args.output_all, self._paths.database_folder, self._args.tolerance, self._args.tolerance_rbs, self._args.utr_length) ribos = Ribos(args_ribo) ribos.run_ribos(args_ribo) def crispr(self): """CRISPR prediction""" print("Running CRISPR prediction") self.check_multi_files( [self._args.fasta_files, self._args.annotation_files], ["--fasta_files", "--annotation_files"]) self._args.crt_path = self.check_execute_file(self._args.crt_path) project_creator.create_subfolders( self._paths.required_folders("crispr")) args_cris = self.args_container.container_cris( self._args.fasta_files, self._args.annotation_files, self._args.crt_path, self._args.window_size, self._args.min_number_repeats, self._args.min_length_repeat, self._args.Max_length_repeat, self._args.min_length_spacer, self._args.Max_length_spacer, self._paths.crispr_output_folder, self._args.ignore_hypothetical_protein) cris = Crispr(args_cris) cris.run_crispr(args_cris) def merge(self): """Merge all features""" print("Merging all features to one gff file") merge_folder = os.path.join(self._paths.output_folder, "merge_all_features") self.helper.check_make_folder(merge_folder) other_features = self._args.other_features_files self.check_file([self._args.transcript_file] + other_features, ["--transcript_file", "--other_features_files"], False) self.check_parameter([self._args.output_prefix], ["--output_prefix"]) run_merge(merge_folder, self._args.transcript_file, self._args.other_features_files, self._args.terminator_tolerance, self._args.tss_tolerance, os.path.join(merge_folder, self._args.output_prefix)) def screen(self): """generate screenshot""" print("Running screenshot generation") self.check_file([self._args.main_gff, self._args.fasta_file], ["--main_gff", "--fasta_file"], True) if self._args.side_gffs is not None: for gff in (self._args.side_gffs): gff = gff.strip() if not os.path.isfile(gff): print("Error: The --side_gffs do not exist!") sys.exit() if self._args.output_folder is None: print("Error: Please assign --output_folder!") sys.exit() if (self._args.present != "expand") and ( self._args.present != "collapse") and ( self._args.present != "squish"): print("Error: Please assign \"expand\" or " "\"collapse\" or \"squish\" to --present!") sys.exit() args_sc = self.args_container.container_screen( self._args.main_gff, self._args.side_gffs, self._args.fasta_file, self._args.height, self._args.tex_notex_libs, self._args.frag_libs, self._args.present, self._args.output_folder) screen = Screen(args_sc) screen.screenshot(args_sc)
class sRNADetection(object): '''detection of sRNA''' def __init__(self, args_srna): self.args_container = ArgsContainer() self.helper = Helper() self.multiparser = Multiparser() self.gff_output = os.path.join(args_srna.out_folder, "gffs") self.table_output = os.path.join(args_srna.out_folder, "tables") self.stat_path = os.path.join(args_srna.out_folder, "statistics") self.tss_path = self._check_folder_exist(args_srna.tss_folder) self.pro_path = self._check_folder_exist(args_srna.pro_folder) self.sorf_path = self._check_folder_exist(args_srna.sorf_file) self.fasta_path = os.path.join(args_srna.fastas, "tmp") self.tran_path = os.path.join(args_srna.trans, "tmp") self.term_path = self._check_folder_exist(args_srna.terms) self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs") self.prefixs = { "merge": os.path.join(args_srna.out_folder, "tmp_merge"), "utr": os.path.join(args_srna.out_folder, "tmp_utrsrna"), "normal": os.path.join(args_srna.out_folder, "tmp_normal"), "in_cds": os.path.join(args_srna.out_folder, "tmp_incds"), "merge_table": os.path.join(args_srna.out_folder, "tmp_merge_table"), "utr_table": os.path.join(args_srna.out_folder, "tmp_utrsrna_table"), "normal_table": os.path.join(args_srna.out_folder, "tmp_normal_table"), "in_cds_table": os.path.join(args_srna.out_folder, "tmp_incds_table"), "basic": os.path.join(args_srna.out_folder, "tmp_basic"), "energy": os.path.join(args_srna.out_folder, "tmp_energy") } self.tmps = { "nr": os.path.join(args_srna.out_folder, "tmp_nr"), "srna": os.path.join(args_srna.out_folder, "tmp_sRNA") } self.best_table = os.path.join(self.table_output, "best") self.table_output = os.path.join(args_srna.out_folder, "tables") self.stat_path = os.path.join(args_srna.out_folder, "statistics") self.all_best = { "all_gff": os.path.join(self.gff_output, "all_candidates"), "best_gff": os.path.join(self.gff_output, "best"), "all_table": os.path.join(self.table_output, "all_candidates"), "best_table": os.path.join(self.table_output, "best") } def _check_folder_exist(self, folder): if folder is not None: path = os.path.join(folder, "tmp") else: path = None return path def _check_gff(self, gffs): for gff in os.listdir(gffs): if gff.endswith(".gff"): self.helper.check_uni_attributes(os.path.join(gffs, gff)) def _run_format(self, blast_path, database, type_, db_file, err): call([ os.path.join(blast_path, "makeblastdb"), "-in", database, "-dbtype", type_, "-out", db_file ], stderr=err) def _formatdb(self, database, type_, out_folder, blast_path, database_type): err = open(os.path.join(out_folder, "log.txt"), "w") if (database.endswith(".fa")) or (database.endswith(".fna")) or ( database.endswith(".fasta")): pass else: folders = database.split("/") filename = folders[-1] folder = "/".join(folders[:-1]) for fasta in os.listdir(folder): if (fasta.endswith(".fa")) or (fasta.endswith(".fna")) or ( fasta.endswith(".fasta")): if ".".join(fasta.split(".")[:-1]) == filename: database = os.path.join(folder, fasta) if database_type == "sRNA": change_format(database, "tmp_srna_database") os.remove(database) shutil.move("tmp_srna_database", database) db_file = ".".join(database.split(".")[:-1]) self._run_format(blast_path, database, type_, db_file, err) err.close() def _merge_frag_tex_file(self, files, args_srna): '''merge the results of fragmented and tex treated libs''' if (args_srna.frag_wigs is not None) and (args_srna.tex_wigs is not None): self.helper.merge_file(files["frag_gff"], files["tex_gff"]) self.helper.merge_file(files["frag_csv"], files["tex_csv"]) shutil.move(files["tex_csv"], files["merge_csv"]) self.helper.sort_gff(files["tex_gff"], files["merge_gff"]) os.remove(files["frag_csv"]) os.remove(files["frag_gff"]) os.remove(files["tex_gff"]) elif (args_srna.frag_wigs is not None): shutil.move(files["frag_csv"], files["merge_csv"]) self.helper.sort_gff(files["frag_gff"], files["merge_gff"]) os.remove(files["frag_gff"]) elif (args_srna.tex_wigs is not None): shutil.move(files["tex_csv"], files["merge_csv"]) self.helper.sort_gff(files["tex_gff"], files["merge_gff"]) def _read_lib_wig(self, args_srna): libs, texs = read_libs(args_srna.input_libs, args_srna.wig_folder) wigs_f = read_wig(args_srna.wig_f_file, "+", libs) wigs_r = read_wig(args_srna.wig_r_file, "-", libs) return [libs, texs, wigs_f, wigs_r] def _run_normal(self, prefix, gff, tran, fuzzy_tss, args_srna): '''detection of intergenic and antisense sRNA''' tex_datas = None frag_datas = None if "tmp_cutoff_inter" in os.listdir(args_srna.out_folder): os.remove(os.path.join(args_srna.out_folder, "tmp_cutoff_inter")) files = { "frag_gff": None, "frag_csv": None, "tex_gff": None, "tex_csv": None, "merge_gff": None, "merge_csv": None } if self.tss_path is not None: tss = self.helper.get_correct_file(self.tss_path, "_TSS.gff", prefix, None, None) else: tss = None if self.pro_path is not None: pro = self.helper.get_correct_file(self.pro_path, "_processing.gff", prefix, None, None) else: pro = None if args_srna.frag_wigs is not None: files["frag_gff"] = os.path.join(args_srna.out_folder, "_".join(["tmp_frag", prefix])) files["frag_csv"] = os.path.join( args_srna.out_folder, "_".join(["tmp_frag_table", prefix])) args_srna = self.args_container.container_intersrna( "frag", files, args_srna, prefix, os.path.join(args_srna.gffs, gff), tran, tss, pro, fuzzy_tss) frag_datas = self._read_lib_wig(args_srna) intergenic_srna(args_srna, frag_datas[0], frag_datas[1], frag_datas[2], frag_datas[3]) if args_srna.tex_wigs is not None: files["tex_gff"] = os.path.join(args_srna.out_folder, "_".join(["tmp_tex", prefix])) files["tex_csv"] = os.path.join( args_srna.out_folder, "_".join(["tmp_tex_table", prefix])) args_srna = self.args_container.container_intersrna( "tex", files, args_srna, prefix, os.path.join(args_srna.gffs, gff), tran, tss, pro, fuzzy_tss) tex_datas = self._read_lib_wig(args_srna) intergenic_srna(args_srna, tex_datas[0], tex_datas[1], tex_datas[2], tex_datas[3]) files["merge_csv"] = "_".join([self.prefixs["normal_table"], prefix]) files["merge_gff"] = "_".join([self.prefixs["normal"], prefix]) self._merge_frag_tex_file(files, args_srna) if ("TSS_class" in os.listdir( args_srna.out_folder)) and (not args_srna.tss_source): tss = os.path.join(args_srna.out_folder, "TSS_class", prefix + "_TSS.gff") return tss, frag_datas, tex_datas def _run_utrsrna(self, gff, tran, prefix, tss, pro, args_srna, frag_datas, tex_datas): '''detection of UTR-derived sRNA''' if "tmp_median" in os.listdir(args_srna.out_folder): os.remove(os.path.join(args_srna.out_folder, "tmp_median")) files = { "frag_gff": None, "frag_csv": None, "tex_gff": None, "tex_csv": None, "merge_gff": None, "merge_csv": None } if args_srna.tex_wigs is not None: files["tex_gff"] = os.path.join(args_srna.out_folder, "_".join(["tmp_utr_tex", prefix])) files["tex_csv"] = os.path.join( args_srna.out_folder, "_".join(["tmp_utr_tex_table", prefix])) args_srna = self.args_container.container_utrsrna( os.path.join(args_srna.gffs, gff), tran, tss, files, pro, os.path.join(self.fasta_path, prefix + ".fa"), "tex", prefix, args_srna) utr_derived_srna(args_srna, tex_datas[0], tex_datas[1], tex_datas[2], tex_datas[3]) if args_srna.frag_wigs is not None: files["frag_gff"] = os.path.join( args_srna.out_folder, "_".join(["tmp_utr_frag", prefix])) files["frag_csv"] = os.path.join( args_srna.out_folder, "_".join(["tmp_utr_frag_table", prefix])) args_srna = self.args_container.container_utrsrna( os.path.join(args_srna.gffs, gff), tran, tss, files, pro, os.path.join(self.fasta_path, prefix + ".fa"), "frag", prefix, args_srna) utr_derived_srna(args_srna, frag_datas[0], frag_datas[1], frag_datas[2], frag_datas[3]) files["merge_csv"] = "_".join([self.prefixs["utr_table"], prefix]) files["merge_gff"] = "_".join([self.prefixs["utr"], prefix]) self._merge_frag_tex_file(files, args_srna) filter_utr(files["merge_gff"], files["merge_csv"], args_srna.min_utr) def _check_necessary_file(self, args_srna): if (args_srna.gffs is None) or (args_srna.trans is None) or ( (args_srna.tex_wigs is None) and (args_srna.frag_wigs is None)): print("Error: lack required files!!!!") sys.exit() if args_srna.utr_srna: if (args_srna.tss_folder is None): print("Error: lack required TSS files for UTR " "derived sRNA detection!!!!") sys.exit() if (args_srna.pro_folder is None): print("Warning: lack Processing site files for UTR " "derived sRNA detection!!!") print("it may effect the results!!!!") self._check_gff(args_srna.gffs) self._check_gff(args_srna.trans) if args_srna.tss_folder is not None: self._check_gff(args_srna.tss_folder) self.multiparser.parser_gff(args_srna.tss_folder, "TSS") self.multiparser.combine_gff(args_srna.gffs, self.tss_path, None, "TSS") if args_srna.pro_folder is not None: self._check_gff(args_srna.pro_folder) self.multiparser.parser_gff(args_srna.pro_folder, "processing") self.multiparser.combine_gff(args_srna.gffs, self.pro_path, None, "processing") if args_srna.sorf_file is not None: self._check_gff(args_srna.sorf_file) self.multiparser.parser_gff(args_srna.sorf_file, "sORF") self.multiparser.combine_gff(args_srna.gffs, self.sorf_path, None, "sORF") if args_srna.import_info is not None: if args_srna.utr_srna or ("sec_str" in args_srna.import_info) or ( args_srna.nr_database is not None) or (args_srna.srna_database is not None): if args_srna.fastas is None: print("Error: lack required fasta files for UTR " "derived sRNA detection!!!!") sys.exit() self.multiparser.parser_fasta(args_srna.fastas) self.multiparser.combine_fasta(args_srna.gffs, self.fasta_path, None) if args_srna.terms is not None: self._check_gff(args_srna.terms) self.multiparser.parser_gff(args_srna.terms, "term") self.multiparser.combine_gff(args_srna.gffs, self.term_path, None, "term") else: self.term_path = None def _merge_tex_frag_datas(self, tex_datas, frag_datas): if (tex_datas is not None) and (frag_datas is not None): for index in [2, 3]: for strain, conds in frag_datas[index].items(): if strain not in tex_datas[index].keys(): tex_datas[index][strain] = conds else: for cond, tracks in conds.items(): tex_datas[index][strain][cond] = tracks elif (tex_datas is None) and (frag_datas is not None): tex_datas = frag_datas return tex_datas def _run_program(self, args_srna): prefixs = [] tss = None for gff in os.listdir(args_srna.gffs): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") prefixs.append(prefix) print("Running sRNA detection of {0}....".format(prefix)) tran = self.helper.get_correct_file(self.tran_path, "_transcript.gff", prefix, None, None) gffs = { "merge": "_".join([self.prefixs["merge"], prefix]), "utr": "_".join([self.prefixs["utr"], prefix]), "normal": "_".join([self.prefixs["normal"], prefix]) } csvs = { "merge": "_".join([self.prefixs["merge_table"], prefix]), "utr": "_".join([self.prefixs["utr_table"], prefix]), "normal": "_".join([self.prefixs["normal_table"], prefix]) } tss, frag_datas, tex_datas = self._run_normal( prefix, gff, tran, args_srna.fuzzy_tsss["inter"], args_srna) if args_srna.utr_srna: print("Running UTR derived sRNA detection of {0}".format( prefix)) if tss is None: tss = self.helper.get_correct_file( self.tss_path, "_TSS.gff", prefix, None, None) if self.pro_path is not None: pro = self.helper.get_correct_file( self.pro_path, "_processing.gff", prefix, None, None) else: pro = None if tss is not None: self._run_utrsrna(gff, tran, prefix, tss, pro, args_srna, frag_datas, tex_datas) tex_datas = self._merge_tex_frag_datas(tex_datas, frag_datas) del frag_datas gc.collect() self._merge_srna(args_srna, gffs, csvs, prefix, os.path.join(args_srna.gffs, gff), tss, tex_datas) del tex_datas filter_frag(csvs["merge"], gffs["merge"]) self.helper.sort_gff(gffs["merge"], "_".join([self.prefixs["basic"], prefix])) return prefixs def _merge_srna(self, args_srna, gffs, csvs, prefix, gff_file, tss, tex_datas): print("merging data of sRNA...") merge_srna_gff(gffs, args_srna.in_cds, args_srna.cutoff_overlap, gff_file) merge_srna_table(gffs["merge"], csvs, tex_datas[2], tex_datas[3], tss, args_srna) def _run_RNAfold(self, seq_file, vienna_path, sec_file): os.system(" ".join([ "cat", seq_file, "|", os.path.join(vienna_path, "RNAfold"), "-p", ">", sec_file ])) def _get_seq_sec(self, fasta_path, out_folder, prefix, sec_path, dot_path, vienna_path): '''extract the sec str energy''' detect = False for fasta in os.listdir(fasta_path): if fasta.endswith(".fa") and (fasta.replace(".fa", "") == prefix): detect = True break if detect: detect = False seq_file = os.path.join(out_folder, "_".join(["sRNA_seq", prefix])) sec_file = os.path.join(out_folder, "_".join(["sRNA_2d", prefix])) self.helper.get_seq("_".join([self.prefixs["basic"], prefix]), os.path.join(fasta_path, fasta), seq_file) else: print("Error:There is not fasta file of {0}".format(prefix)) print("please check your imported information") sys.exit() tmp_path = os.path.join(out_folder, "tmp_srna") self.helper.check_make_folder(tmp_path) main_path = os.getcwd() os.chdir(tmp_path) sec_file = os.path.join(main_path, sec_file) seq_file = os.path.join(main_path, seq_file) tmp_sec_path = os.path.join(main_path, sec_path) tmp_dot_path = os.path.join(main_path, dot_path) self._run_RNAfold(seq_file, vienna_path, sec_file) extract_energy( os.path.join(main_path, "_".join([self.prefixs["basic"], prefix])), sec_file, os.path.join(main_path, "_".join([self.prefixs["energy"], prefix]))) for ps in os.listdir(os.getcwd()): new_ps = ps.replace("|", "_") shutil.move(ps, new_ps) return { "sec": tmp_sec_path, "dot": tmp_dot_path, "main": main_path, "tmp": os.path.join(main_path, tmp_path) } def _run_replot(self, vienna_util, tmp_paths, file_, dot_file, rel_file): os.system(" ".join([ os.path.join(vienna_util, "relplot.pl"), os.path.join(tmp_paths["tmp"], file_), os.path.join(tmp_paths["tmp"], dot_file), ">", os.path.join(tmp_paths["tmp"], rel_file) ])) def _convert_pdf(self, ps2pdf14_path, tmp_paths, file_, pdf_file): call([ps2pdf14_path, os.path.join(tmp_paths["tmp"], file_), pdf_file]) def _replot_sec_to_pdf(self, vienna_util, tmp_paths, ps2pdf14_path, prefix): for file_ in os.listdir(os.getcwd()): if file_.endswith("ss.ps"): dot_file = file_.replace("ss.ps", "dp.ps") rel_file = file_.replace("ss.ps", "rss.ps") print("replot {0}".format(file_)) self._run_replot(vienna_util, tmp_paths, file_, dot_file, rel_file) for file_ in os.listdir(tmp_paths["tmp"]): if (file_.endswith("rss.ps")) or (file_.endswith("dp.ps")): pdf_file = file_.replace(".ps", ".pdf") print("convert {0} to pdf".format(file_)) self._convert_pdf(ps2pdf14_path, tmp_paths, file_, pdf_file) os.mkdir(os.path.join(tmp_paths["sec"], prefix)) os.mkdir(os.path.join(tmp_paths["dot"], prefix)) self.helper.move_all_content(tmp_paths["tmp"], os.path.join(tmp_paths["sec"], prefix), ["rss.pdf"]) self.helper.move_all_content(tmp_paths["tmp"], os.path.join(tmp_paths["dot"], prefix), ["dp.pdf"]) def _run_mountain(self, vienna_util, tmp_paths, dot_file, out): call([ os.path.join(vienna_util, "mountain.pl"), os.path.join(tmp_paths["tmp"], dot_file) ], stdout=out) def _plot_mountain(self, mountain, moun_path, tmp_paths, prefix, vienna_util): if mountain: tmp_moun_path = os.path.join(tmp_paths["main"], moun_path) os.mkdir(os.path.join(tmp_moun_path, prefix)) txt_path = os.path.join(tmp_paths["tmp"], "tmp_txt") self.helper.check_make_folder(txt_path) print("Generating mountain plot of {0}....".format(prefix)) for dot_file in os.listdir(tmp_paths["tmp"]): if dot_file.endswith("dp.ps"): moun_txt = os.path.join(tmp_paths["tmp"], "mountain.txt") out = open(moun_txt, "w") moun_file = dot_file.replace("dp.ps", "mountain.pdf") print("Generating {0}".format(moun_file)) self._run_mountain(vienna_util, tmp_paths, dot_file, out) plot_mountain_plot(moun_txt, moun_file) shutil.move(moun_file, os.path.join(tmp_moun_path, prefix, moun_file)) out.close() os.remove(moun_txt) def _compute_2d_and_energy(self, args_srna, prefixs): print("Running energy calculation....") moun_path = os.path.join(args_srna.out_folder, "mountain_plot") sec_path = os.path.join(args_srna.out_folder, "sec_structure", "sec_plot") dot_path = os.path.join(args_srna.out_folder, "sec_structure", "dot_plot") self.helper.remove_all_content(sec_path, None, "dir") self.helper.remove_all_content(dot_path, None, "dir") self.helper.remove_all_content(moun_path, None, "dir") for prefix in prefixs: tmp_paths = self._get_seq_sec(self.fasta_path, args_srna.out_folder, prefix, sec_path, dot_path, args_srna.vienna_path) self._replot_sec_to_pdf(args_srna.vienna_util, tmp_paths, args_srna.ps2pdf14_path, prefix) self._plot_mountain(args_srna.mountain, moun_path, tmp_paths, prefix, args_srna.vienna_util) self.helper.remove_all_content(os.getcwd(), ".ps", "file") os.chdir(tmp_paths["main"]) shutil.move("_".join([self.prefixs["energy"], prefix]), "_".join([self.prefixs["basic"], prefix])) shutil.rmtree(os.path.join(args_srna.out_folder, "tmp_srna")) def _run_blast(self, blast_path, program, database, e, seq_file, blast_file, strand): call([ os.path.join(blast_path, program), "-db", database, "-evalue", str(e), "-strand", strand, "-query", seq_file, "-out", blast_file ]) def _get_strand_fasta(self, seq_file, out_folder): tmp_plus = os.path.join(out_folder, "tmp_plus.fa") tmp_minus = os.path.join(out_folder, "tmp_minus.fa") out_p = open(tmp_plus, "w") out_m = open(tmp_minus, "w") strand = "" with open(seq_file) as sh: for line in sh: line = line.strip() if line.startswith(">"): if line[-1] == "+": out_p.write(line + "\n") strand = "plus" elif line[-1] == "-": out_m.write(line + "\n") strand = "minus" else: if strand == "plus": out_p.write(line + "\n") elif strand == "minus": out_m.write(line + "\n") out_p.close() out_m.close() return tmp_plus, tmp_minus def _blast(self, database, database_format, data_type, args_srna, prefixs, program, database_type, e): if (database is None): print("Error: No database assigned!") else: if database_format: self._formatdb(database, data_type, args_srna.out_folder, args_srna.blast_path, database_type) for prefix in prefixs: blast_file = os.path.join( args_srna.out_folder, "blast_result_and_misc", "_".join([database_type, "blast", prefix + ".txt"])) srna_file = "_".join([self.prefixs["basic"], prefix]) out_file = os.path.join( args_srna.out_folder, "_".join(["tmp", database_type, prefix])) print("Running Blast of {0} in {1}".format(prefix, database)) seq_file = os.path.join(args_srna.out_folder, "_".join(["sRNA_seq", prefix])) if seq_file not in os.listdir(args_srna.out_folder): self.helper.get_seq( srna_file, os.path.join(self.fasta_path, prefix + ".fa"), seq_file) if database_type == "nr": tmp_plus, tmp_minus = self._get_strand_fasta( seq_file, args_srna.out_folder) tmp_blast = os.path.join("tmp_blast.txt") self._run_blast(args_srna.blast_path, program, database, e, tmp_plus, tmp_blast, "plus") self._run_blast(args_srna.blast_path, program, database, e, tmp_minus, blast_file, "minus") self.helper.merge_file(tmp_blast, blast_file) os.remove(tmp_blast) os.remove(tmp_plus) os.remove(tmp_minus) else: self._run_blast(args_srna.blast_path, program, database, e, seq_file, blast_file, "both") extract_blast(blast_file, srna_file, out_file, out_file + ".csv", database_type) shutil.move(out_file, srna_file) def _class_srna(self, prefixs, args_srna): '''classify the sRNA based on the filters''' if (args_srna.import_info is not None) or (args_srna.srna_database is not None) or ( args_srna.nr_database is not None) or (self.sorf_path is not None) or ( self.tss_path is not None) or (self.term_path is not None) or ( args_srna.promoter_table is not None): for prefix in prefixs: print("classifying sRNA of {0}".format(prefix)) class_gff = os.path.join(self.gff_output, "for_class") class_table = os.path.join(self.table_output, "for_class") self.helper.check_make_folder(os.path.join( class_table, prefix)) self.helper.check_make_folder(os.path.join(class_gff, prefix)) class_gff = os.path.join(class_gff, prefix) class_table = os.path.join(class_table, prefix) self.helper.check_make_folder(class_table) self.helper.check_make_folder(class_gff) out_stat = os.path.join( self.stat_path, "_".join(["stat_sRNA_class", prefix + ".csv"])) classify_srna( os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), class_gff, out_stat, args_srna) for srna in os.listdir(class_gff): out_table = os.path.join(class_table, srna.replace(".gff", ".csv")) gen_srna_table( os.path.join(class_gff, srna), "_".join([self.prefixs["merge_table"], prefix]), "_".join([self.tmps["nr"], prefix + ".csv"]), "_".join([self.tmps["srna"], prefix + ".csv"]), args_srna, out_table, self.term_path) def _get_best_result(self, prefixs, args_srna): '''get the best results based on the filters''' for prefix in prefixs: best_gff = os.path.join(self.all_best["best_gff"], "_".join([prefix, "sRNA.gff"])) best_table = os.path.join(self.all_best["best_table"], "_".join([prefix, "sRNA.csv"])) gen_best_srna( os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), best_gff, args_srna) gen_srna_table( os.path.join(self.all_best["best_gff"], "_".join([prefix, "sRNA.gff"])), "_".join([self.prefixs["merge_table"], prefix]), "_".join([self.tmps["nr"], prefix + ".csv"]), "_".join([self.tmps["srna"], prefix + ".csv"]), args_srna, best_table, self.term_path) def _remove_file(self, args_srna): self.helper.remove_all_content(args_srna.out_folder, "tmp_", "dir") self.helper.remove_all_content(args_srna.out_folder, "tmp_", "file") self.helper.remove_tmp(args_srna.fastas) self.helper.remove_tmp(args_srna.gffs) self.helper.remove_tmp(self.gff_output) if args_srna.frag_wigs is not None: self.helper.remove_tmp(args_srna.frag_wigs) if args_srna.tex_wigs is not None: self.helper.remove_tmp(args_srna.tex_wigs) if (args_srna.frag_wigs is not None) and (args_srna.tex_wigs is not None): shutil.rmtree(args_srna.merge_wigs) self.helper.remove_tmp(args_srna.trans) if args_srna.tss_folder is not None: self.helper.remove_tmp(args_srna.tss_folder) if args_srna.pro_folder is not None: self.helper.remove_tmp(args_srna.pro_folder) if args_srna.sorf_file is not None: self.helper.remove_tmp(args_srna.sorf_file) if "tmp_median" in os.listdir(args_srna.out_folder): os.remove(os.path.join(args_srna.out_folder, "tmp_median")) if self.term_path is not None: self.helper.remove_tmp(args_srna.terms) def _filter_srna(self, args_srna, prefixs): '''set the filter of sRNA''' if args_srna.import_info is not None: if "sec_str" in args_srna.import_info: self._compute_2d_and_energy(args_srna, prefixs) if args_srna.nr_database is not None: self._blast(args_srna.nr_database, args_srna.nr_format, "prot", args_srna, prefixs, "blastx", "nr", args_srna.e_nr) if self.sorf_path is not None: for prefix in prefixs: if ("_".join([prefix, "sORF.gff"]) in os.listdir(self.sorf_path)): tmp_srna = os.path.join(args_srna.out_folder, "".join(["tmp_srna_sorf", prefix])) tmp_sorf = os.path.join(args_srna.out_folder, "".join(["tmp_sorf_srna", prefix])) srna_sorf_comparison( "_".join([self.prefixs["basic"], prefix]), os.path.join(self.sorf_path, "_".join([prefix, "sORF.gff"])), tmp_srna, tmp_sorf) os.remove(tmp_sorf) shutil.move(tmp_srna, "_".join([self.prefixs["basic"], prefix])) if args_srna.srna_database is not None: self._blast(args_srna.srna_database, args_srna.srna_format, "nucl", args_srna, prefixs, "blastn", "sRNA", args_srna.e_srna) def _import_info_format(self, import_info): new_info = [] for info in import_info: info = info.lower() new_info.append(info) return new_info def _gen_table(self, prefixs, args_srna): for prefix in prefixs: out_table = os.path.join(self.all_best["all_table"], "_".join([prefix, "sRNA.csv"])) gen_srna_table( os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), "_".join([self.prefixs["merge_table"], prefix]), "_".join([self.tmps["nr"], prefix + ".csv"]), "_".join([self.tmps["srna"], prefix + ".csv"]), args_srna, out_table, self.term_path) def _print_rank_all(self, prefixs): for prefix in prefixs: all_table = os.path.join(self.all_best["all_table"], "_".join([prefix, "sRNA.csv"])) best_table = os.path.join(self.all_best["best_table"], "_".join([prefix, "sRNA.csv"])) print_rank_all(all_table, best_table) def _filter_min_utr(self, prefixs, min_utr): '''filter out the low expressed UTR-derived sRNA''' for prefix in prefixs: filter_utr( os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), os.path.join(self.all_best["all_table"], "_".join([prefix, "sRNA.csv"])), min_utr) def _antisense(self, gffs, prefixs): '''detection of antisense''' for prefix in prefixs: all_table = os.path.join(self.all_best["all_table"], "_".join([prefix, "sRNA.csv"])) best_table = os.path.join(self.all_best["best_table"], "_".join([prefix, "sRNA.csv"])) all_gff = os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])) best_gff = os.path.join(self.all_best["best_gff"], "_".join([prefix, "sRNA.gff"])) srna_antisense(all_gff, all_table, os.path.join(gffs, prefix + ".gff")) srna_antisense(best_gff, best_table, os.path.join(gffs, prefix + ".gff")) def _blast_stat(self, stat_path, srna_tables): '''do statistics for blast result''' for srna_table in os.listdir(os.path.join(srna_tables, "best")): out_srna_blast = os.path.join( stat_path, "stat_" + srna_table.replace(".csv", "_blast.csv")) blast_class(os.path.join(srna_tables, "best", srna_table), out_srna_blast) def _compare_term_promoter(self, out_table, prefix, args_srna): '''compare sRNA with terminator and promoter''' if self.term_path is not None: compare_srna_term( os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), out_table, os.path.join(self.term_path, "_".join([prefix, "term.gff"])), args_srna.fuzzy_b, args_srna.fuzzy_a) if (args_srna.promoter_table is not None): compare_srna_promoter( os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"])), out_table, args_srna) def run_srna_detection(self, args_srna): self._check_necessary_file(args_srna) self.multiparser.parser_gff(args_srna.trans, "transcript") self.multiparser.combine_gff(args_srna.gffs, self.tran_path, None, "transcript") if args_srna.import_info is not None: args_srna.import_info = self._import_info_format( args_srna.import_info) prefixs = self._run_program(args_srna) self._filter_srna(args_srna, prefixs) for prefix in prefixs: shutil.copyfile( "_".join([self.prefixs["basic"], prefix]), os.path.join(self.all_best["all_gff"], "_".join([prefix, "sRNA.gff"]))) self._compare_term_promoter( "_".join([self.prefixs["merge_table"], prefix]), prefix, args_srna) self._gen_table(prefixs, args_srna) self._class_srna(prefixs, args_srna) self._get_best_result(prefixs, args_srna) self._print_rank_all(prefixs) if args_srna.srna_database is not None: if "blast_srna" in args_srna.import_info: self._blast_stat(self.stat_path, self.table_output) self._remove_file(args_srna)
def __init__(self, args): """Create an instance.""" self._args = args self._paths = Paths(args.project_path) self.args_container = ArgsContainer() self.helper = Helper()
class Controller(object): """Manage the actions of the subcommands. The Controller take care of providing the argumentes like path names and the parallel processing of tasks. """ def __init__(self, args): """Create an instance.""" self._args = args self._paths = Paths(args.project_path) self.args_container = ArgsContainer() self.helper = Helper() def check_folder(self, folders): for folder in folders: if folder is None: print("Error: There is wrong path of folder assigned, " "please check it!!") sys.exit() else: if os.path.exists(folder): if len(os.listdir(folder)) == 0: print("Error: There is empty folder, " "please check it!!") sys.exit() else: print("Error: There is wrong folder, please check it!!") sys.exit() def check_parameter(self, paras, names): for i in range(len(paras)): if paras[i] is None: print("Error: {0} is wrong, " "please check it!!".format(names[i])) sys.exit() def check_no_require_folder(self, folders): for folder in folders: if folder is not None: if os.path.exists(folder): if len(os.listdir(folder)) == 0: print("Error: There is empty folder, " "please check it!!") sys.exit() else: print("Error: There is wrong folder, " "please check it!!") sys.exit() def check_file(self, files, names, require): for i in range(len(files)): if require: if files[i] is None: print("Error: {0} is wrong, " "please check it!!".format(names[i])) sys.exit() else: if not os.path.isfile(files[i]): print("Error: There is wrong path of {0}, " "please check it!!".format(names[i])) sys.exit() else: if files[i] is not None: if not os.path.isfile(files[i]): print("Error: There is wrong path of {0}, " "please check it!!".format(names[i])) sys.exit() def create_project(self, version): """Create a new project.""" project_creator.create_root_folder(self._args.project_path) project_creator.create_subfolders(self._paths.required_folders("root")) project_creator.create_subfolders( self._paths.required_folders("get_target_fasta")) project_creator.create_version_file( self._paths.version_path, version) sys.stdout.write("Created folder \"%s\" and required subfolders.\n" % ( self._args.project_path)) def get_input(self): """Download required files from website.""" print("Running get input files...") if self._args.FTP_path is None: print("Error: Please assign the path for downloading the data!!") sys.exit() if self._args.for_target: annotation_folder = self._paths.tar_annotation_folder fasta_folder = self._paths.tar_fasta_folder else: annotation_folder = self._paths.ref_annotation_folder fasta_folder = self._paths.ref_fasta_folder self.helper.check_make_folder(annotation_folder) self.helper.check_make_folder(fasta_folder) if self._args.ref_gff is True: get_file(self._args.FTP_path, annotation_folder, "gff", self._args.for_target) get_file(self._args.FTP_path, annotation_folder, "_genomic.gff.gz", self._args.for_target) if self._args.ref_fasta is True: get_file(self._args.FTP_path, fasta_folder, "fna", self._args.for_target) get_file(self._args.FTP_path, fasta_folder, "_genomic.fna.gz", self._args.for_target) if self._args.ref_gbk is True: get_file(self._args.FTP_path, annotation_folder, "gbk", self._args.for_target) get_file(self._args.FTP_path, annotation_folder, "gbff", self._args.for_target) get_file(self._args.FTP_path, annotation_folder, "_genomic.gbff.gz", self._args.for_target) if self._args.ref_ptt is True: get_file(self._args.FTP_path, annotation_folder, "ptt", self._args.for_target) if self._args.ref_rnt is True: get_file(self._args.FTP_path, annotation_folder, "rnt", self._args.for_target) if self._args.convert_embl is True: annotation_files = os.listdir(annotation_folder) if len(annotation_files) == 0: sys.stdout.write("No gbk files!!\n") else: Converter().convert_gbk2embl(annotation_folder) def get_target_fasta(self): """Get target fasta""" print("Running get target fasta...") self.check_parameter([self._args.output_format], ["--output_format"]) self.check_folder([self._args.ref_fasta_folder]) self.check_file([self._args.mutation_table], "--mutation_table", True) project_creator.create_subfolders( self._paths.required_folders("get_target_fasta")) outputs = self._args.output_format.split(",") for output in outputs: output = output.strip() target = TargetFasta(self._paths.tar_fasta_folder, self._args.ref_fasta_folder) target.get_target_fasta( self._args.mutation_table, self._paths.tar_fasta_folder, self._args.ref_fasta_folder, outputs) def ratt(self): """Run RATT to transfer annotation file from reference to target.""" print("Running annotation transfer...") if (self._args.transfer_type != "Strain") and ( self._args.transfer_type != "Assembly") and ( self._args.transfer_type != "Species") and ( self._args.transfer_type != "Assembly.Repetitive") and ( self._args.transfer_type != "Strain.Repetitive") and ( self._args.transfer_type != "Species.Repetitive") and ( self._args.transfer_type != "Multiple") and ( self._args.transfer_type != "Free"): print("Error: please assign correct --transfer_type!!") sys.exit() self.check_folder([self._args.ref_embl_gbk, self._args.target_fasta, self._args.ref_fasta]) self.check_parameter([self._args.element, self._args.compare_pair], ["--element", "--compare_pair"]) project_creator.create_subfolders( self._paths.required_folders("annotation_transfer")) args_ratt = self.args_container.container_ratt( self._args.RATT_path, self._args.element, self._args.transfer_type, self._args.ref_embl_gbk, self._args.target_fasta, self._args.ref_fasta, self._paths.ratt_folder, self._args.convert_to_gff_rnt_ptt, self._paths.tar_annotation_folder, self._args.compare_pair) ratt = RATT(args_ratt) ratt.annotation_transfer(args_ratt) def tsspredator(self): """Run TSSpredator for predicting TSS candidates.""" self.check_folder([self._args.fasta_folder, self._args.annotation_folder, self._args.wig_folder]) self.check_parameter([self._args.lib, self._args.output_prefix], ["--lib", "--output_prefix"]) self.check_no_require_folder([self._args.compare_transcript_assembly, self._args.reference_gff_folder]) self.check_file([self._args.merge_manual], ["--merge_manual"], False) if self._args.compute_program.lower() == "tss": print("Running TSS prediction...") project_creator.create_subfolders( self._paths.required_folders("TSS")) out_folder = self._paths.tsspredator_folder elif self._args.compute_program.lower() == "processing_site": print("Running processing site prediction...") out_folder = self._paths.processing_site_folder project_creator.create_subfolders( self._paths.required_folders("processing")) else: print("Error:No such program!!!!") sys.exit() args_tss = self.args_container.container_tsspredator( self._args.TSSpredator_path, self._args.compute_program, self._args.fasta_folder, self._args.annotation_folder, self._args.wig_folder, self._args.lib, self._args.output_prefix, self._args.height, self._args.height_reduction, self._args.factor, self._args.factor_reduction, self._args.base_height, self._args.enrichment_factor, self._args.processing_factor, self._args.replicate_match, out_folder, self._args.statistics, self._args.validate_gene, self._args.merge_manual, self._args.compare_transcript_assembly, self._args.fuzzy, self._args.utr_length, self._args.cluster, self._args.length, self._args.re_check_orphan, self._args.overlap_feature, self._args.reference_gff_folder, self._args.remove_low_expression) tsspredator = TSSpredator(args_tss) tsspredator.run_tsspredator(args_tss) def optimize(self): """opimize TSSpredator""" self.check_folder([self._args.wig_folder, self._args.fasta_file, self._args.annotation_file]) self.check_file([self._args.manual], ["--manual"], True) self.check_parameter([self._args.strain_name, self._args.lib, self._args.output_prefix], ["--strain_name", "--lib", "--output_prefix"]) if self._args.program.lower() == "tss": print("Running optimization of TSS prediction...") project_creator.create_subfolders( self._paths.required_folders("TSS")) out_folder = self._paths.tsspredator_folder elif self._args.program.lower() == "processing_site": print("Running optimization of processing site prediction...") out_folder = self._paths.processing_site_folder project_creator.create_subfolders( self._paths.required_folders("processing")) else: print("Error:No such program!!!!") sys.exit() args_ops = self.args_container.container_optimize( self._args.TSSpredator_path, self._args.fasta_file, self._args.annotation_file, self._args.wig_folder, self._args.manual, out_folder, self._args.strain_name, self._args.max_height, self._args.max_height_reduction, self._args.max_factor, self._args.max_factor_reduction, self._args.max_base_height, self._args.max_enrichment_factor, self._args.max_processing_factor, self._args.utr_length, self._args.lib, self._args.output_prefix, self._args.cluster, self._args.length, self._args.core, self._args.program, self._args.replicate_match, self._args.steps) optimize_tss(args_ops) def color(self): """color the screenshots""" print("Running png files coloring...") self.check_parameter([self._args.track_number], ["--track_numer"]) self.check_folder([self._args.screenshot_folder]) color = ColorPNG() color.generate_color_png( self._args.track_number, self._args.screenshot_folder, self._args.ImageMagick_covert_path) def terminator(self): """Run TransTermHP for detecting terminators.""" print("Running terminator prediction...") if self._args.TransTermHP_path is None: print("Please assign the folder where you install TransTermHP.") self.check_folder([self._args.fasta_folder, self._args.annotation_folder, self._args.transcript_folder]) self.check_no_require_folder([self._args.sRNA]) project_creator.create_subfolders( self._paths.required_folders("terminator")) args_term = self.args_container.container_terminator( self._args.TransTermHP_path, self._args.expterm_path, self._args.RNAfold_path, self._paths.transterm_folder, self._args.fasta_folder, self._args.annotation_folder, self._args.transcript_folder, self._args.sRNA, self._args.statistics, self._args.tex_wig_folder, self._args.frag_wig_folder, self._args.decrease, self._args.highest_coverage, self._args.fuzzy_detect_coverage, self._args.fuzzy_within_transcript, self._args.fuzzy_downstream_transcript, self._args.fuzzy_within_gene, self._args.fuzzy_downstream_gene, self._paths.transtermhp_folder, self._args.tex_notex_libs, self._args.frag_libs, self._args.tex_notex, self._args.replicates_tex, self._args.replicates_frag, self._args.table_best, self._args.min_loop_length, self._args.max_loop_length, self._args.min_stem_length, self._args.max_stem_length, self._args.min_U_tail_length, self._args.miss_rate, self._args.range_U_tail) terminator = Terminator(args_term) terminator.run_terminator(args_term) def transcript(self): """Run Transcriptome assembly.""" print("Running transcript assembly...") self.check_folder([self._args.annotation_folder]) self.check_no_require_folder([ self._args.compare_TSS, self._args.compare_genome_annotation, self._args.terminator_folder]) project_creator.create_subfolders( self._paths.required_folders("transcript_assembly")) args_tran = self.args_container.container_transcript( self._args.frag_wig_path, self._args.tex_wig_path, self._args.tex_notex, self._args.length, self._args.annotation_folder, self._args.height, self._args.width, self._args.tolerance, self._args.tolerance_coverage, self._args.replicates_tex, self._args.replicates_frag, self._paths.transcript_assembly_output_folder, self._args.compare_TSS, self._args.compare_genome_annotation, self._args.TSS_fuzzy, self._args.Tex_treated_libs, self._args.fragmented_libs, self._args.compare_feature_genome, self._args.table_best, self._args.terminator_folder, self._args.fuzzy_term) transcript = TranscriptAssembly(args_tran) transcript.run_transcript_assembly(args_tran) def utr_detection(self): """Run UTR detection.""" print("Running UTR detection...") self.check_folder([self._args.annotation_folder, self._args.transcript_assembly_folder, self._args.TSS_folder]) self.check_no_require_folder([self._args.terminator_folder]) project_creator.create_subfolders(self._paths.required_folders("utr")) args_utr = self.args_container.container_utr( self._args.TSS_folder, self._args.annotation_folder, self._args.transcript_assembly_folder, self._args.terminator_folder, self._args.terminator_fuzzy, self._paths.utr_folder, self._args.TSS_source, self._args.base_5UTR, self._args.UTR_length, self._args.base_3UTR) utr = UTRDetection(args_utr) utr.run_utr_detection(args_utr) def srna_detection(self): """sRNA_detection.""" print("Running sRNA prediction...") self.check_folder([self._args.annotation_folder, self._args.transcript_assembly_folder]) self.check_no_require_folder([self._args.fasta_folder, self._args.sORF, self._args.terminator_folder]) self.check_file([self._args.promoter_table], ["--promoter_table"], False) if self._args.UTR_derived_sRNA: self.check_folder([self._args.TSS_folder, self._args.processing_site_folder]) else: self.check_no_require_folder([self._args.TSS_folder, self._args.processing_site_folder]) project_creator.create_subfolders(self._paths.required_folders("srna")) args_srna = self.args_container.container_srna( self._args.Vienna_folder, self._args.Vienna_utils, self._args.blast_plus_folder, self._args.ps2pdf14_path, self._paths.srna_folder, self._args.UTR_derived_sRNA, self._args.annotation_folder, self._args.TSS_folder, self._args.transcript_assembly_folder, self._args.TSS_intergenic_fuzzy, self._args.TSS_5UTR_fuzzy, self._args.TSS_3UTR_fuzzy, self._args.TSS_interCDS_fuzzy, self._args.import_info, self._args.tex_wig_folder, self._args.frag_wig_folder, self._args.processing_site_folder, self._args.fasta_folder, self._args.mountain_plot, self._args.nr_format, self._args.srna_format, self._args.sRNA_database_path, self._args.nr_database_path, self._args.cutoff_energy, self._args.run_intergenic_TEX_coverage, self._args.run_intergenic_noTEX_coverage, self._args.run_intergenic_fragmented_coverage, self._args.run_antisense_TEX_coverage, self._args.run_antisense_noTEX_coverage, self._args.run_antisense_fragmented_coverage, self._args.intergenic_tolerance, self._args.run_utr_TEX_coverage, self._args.run_utr_noTEX_coverage, self._args.run_utr_fragmented_coverage, self._args.max_length, self._args.min_length, self._args.tex_notex_libs, self._args.frag_libs, self._args.replicates_tex, self._args.replicates_frag, self._args.tex_notex, self._args.blast_e_nr, self._args.blast_e_srna, self._args.detect_sRNA_in_CDS, self._args.table_best, self._args.decrease_intergenic, self._args.decrease_utr, self._args.fuzzy_intergenic, self._args.fuzzy_utr, self._args.cutoff_nr_hit, self._args.sORF, self._args.best_with_all_sRNAhit, self._args.best_without_sORF_candidate, self._args.overlap_percent_CDS, self._args.terminator_folder, self._args.terminator_fuzzy_in_CDS, self._args.terminator_fuzzy_out_CDS, self._args.best_with_terminator, self._args.ignore_hypothetical_protein, self._args.TSS_source, self._args.min_utr_coverage, self._args.promoter_table, self._args.best_with_promoter, self._args.ranking_time_promoter, self._args.promoter_name) srna = sRNADetection(args_srna) srna.run_srna_detection(args_srna) def sorf_detection(self): """sORF_detection.""" print("Running sORF prediction...") self.check_folder([self._args.transcript_assembly_folder, self._args.annotation_folder, self._args.fasta_folder]) self.check_no_require_folder([ self._args.sRNA_folder, self._args.TSS_folder]) project_creator.create_subfolders( self._paths.required_folders("sorf")) args_sorf = self.args_container.container_sorf( self._paths.sorf_folder, self._args.UTR_derived_sORF, self._args.transcript_assembly_folder, self._args.annotation_folder, self._args.TSS_folder, self._args.utr_length, self._args.min_length, self._args.max_length, self._args.tex_wig_folder, self._args.frag_wig_folder, self._args.cutoff_intergenic_coverage, self._args.cutoff_antisense_coverage, self._args.cutoff_5utr_coverage, self._args.cutoff_3utr_coverage, self._args.cutoff_interCDS_coverage, self._args.fasta_folder, self._args.tex_notex_libs, self._args.frag_libs, self._args.tex_notex, self._args.replicates_tex, self._args.replicates_frag, self._args.table_best, self._args.sRNA_folder, self._args.start_codon, self._args.stop_codon, self._args.cutoff_background, self._args.fuzzy_rbs, self._args.rbs_not_after_TSS, self._args.print_all_combination, self._args.best_no_sRNA, self._args.best_no_TSS, self._args.ignore_hypothetical_protein, self._args.min_rbs_distance, self._args.max_rbs_distance) sorf = sORFDetection(args_sorf) sorf.run_sorf_detection(args_sorf) def meme(self): """promoter detectopn""" print("Running promoter detection...") self.check_folder([self._args.TSS_folder, self._args.fasta_folder]) if not self._args.TSS_source: self.check_folder([self._args.annotation_folder]) project_creator.create_subfolders( self._paths.required_folders("promoter")) args_pro = self.args_container.container_promoter( self._args.MEME_path, self._paths.promoter_output_folder, self._args.tex_libs, self._args.TSS_folder, self._args.fasta_folder, self._args.num_motif, self._args.nt_before_TSS, self._args.motif_width, self._args.TSS_source, self._args.tex_wig_path, self._args.annotation_folder, self._args.combine_all, self._args.e_value) meme = MEME(args_pro) meme.run_meme(args_pro) def operon(self): """operon detection""" print("Running operon detection...") self.check_folder([self._args.TSS_folder, self._args.annotation_folder, self._args.transcript_folder, self._args.UTR5_folder, self._args.UTR3_folder]) self.check_no_require_folder([self._args.term_folder]) project_creator.create_subfolders( self._paths.required_folders("operon")) args_op = self.args_container.container_operon( self._args.TSS_folder, self._args.annotation_folder, self._args.transcript_folder, self._args.UTR5_folder, self._args.UTR3_folder, self._args.term_folder, self._args.TSS_fuzzy, self._args.term_fuzzy, self._args.min_length, self._args.statistics, self._paths.operon_output_folder, self._args.combine_gff, self._paths.operon_statistics_folder) operon = OperonDetection(args_op) operon.run_operon(args_op) def circrna(self): """circRNA detection""" print("Running circular RNA prediction...") self.check_folder([self._args.fasta_path, self._args.annotation_path]) self.check_no_require_folder([self._args.tex_bam_path, self._args.fragmented_bam_path]) project_creator.create_subfolders( self._paths.required_folders("circrna")) args_circ = self.args_container.container_circrna( self._args.align, self._args.process, self._args.fasta_path, self._args.annotation_path, self._args.tex_bam_path, self._args.fragmented_bam_path, self._paths.read_folder, self._paths.circrna_stat_folder, self._args.support_reads, self._args.segemehl_folder, self._args.samtools_path, self._args.start_ratio, self._args.end_ratio, self._args.ignore_hypothetical_protein, self._paths.circrna_output_folder) circ = CircRNADetection(args_circ) circ.run_circrna(args_circ) def goterm(self): """Go term discovery""" print("Running GO term mapping...") self.check_folder([self._args.annotation_path]) self.check_no_require_folder([self._args.transcript_path]) self.check_file([self._args.UniProt_id, self._args.go_obo, self._args.goslim_obo], ["--UniProt_id", "--go.obo", "--goslim_obo"], True) project_creator.create_subfolders( self._paths.required_folders("go_term")) args_go = self.args_container.container_goterm( self._args.annotation_path, self._paths.goterm_output_folder, self._args.UniProt_id, self._args.go_obo, self._args.goslim_obo, self._args.transcript_path) goterm = GoTermFinding(args_go) goterm.run_go_term(args_go) def srna_target(self): """sRNA target prediction""" print("Running sRNA target prediction...") self.check_folder([self._args.fasta_path, self._args.sRNA_path, self._args.annotation_path]) project_creator.create_subfolders( self._paths.required_folders("srna_target")) args_tar = self.args_container.container_srna_target( self._args.Vienna_folder, self._args.annotation_path, self._args.fasta_path, self._args.sRNA_path, self._args.query_sRNA, self._args.program, self._args.interaction_length, self._args.window_size_target, self._args.span_target, self._args.window_size_srna, self._args.span_srna, self._args.unstructured_region_RNAplex_target, self._args.unstructured_region_RNAplex_srna, self._args.unstructured_region_RNAup, self._args.energy_threshold, self._args.duplex_distance, self._args.top, self._paths.starget_output_folder, self._args.process_rnaplex, self._args.process_rnaup, self._args.continue_rnaup, self._args.potential_target_start, self._args.potential_target_end, self._args.target_feature) srnatarget = sRNATargetPrediction(args_tar) srnatarget.run_srna_target_prediction(args_tar) def snp(self): """SNP transcript detection""" print("Running SNP/mutations calling...") self.check_folder([self._args.fasta_path]) if (self._args.bam_type != "target") and ( self._args.bam_type != "reference"): print("Error: please assign \"target\" or" " \"reference\" to --bam_type!!") sys.exit() if (self._args.ploidy != "haploid") and ( self._args.ploidy != "diploid"): print("Error: please assign \"haploid\" or" " \"diploid\" to --chromosome_type!!") project_creator.create_subfolders(self._paths.required_folders("snp")) args_snp = self.args_container.container_snp( self._args.samtools_path, self._args.bcftools_path, self._args.bam_type, self._args.program, self._args.fasta_path, self._args.tex_bam_path, self._args.frag_bam_path, self._args.quality, self._args.read_depth, self._paths.snp_output_folder, self._args.indel_fraction, self._args.ploidy) snp = SNPCalling(args_snp) snp.run_snp_calling(args_snp) def ppi(self): """PPI network retrieve""" print("Running protein-protein interaction networks prediction...") self.check_folder([self._args.gff_path]) self.check_parameter([self._args.proteinID_strains, self._args.species_STRING], ["--proteinID_strains", "--species_STRING"]) project_creator.create_subfolders( self._paths.required_folders("ppi_network")) args_ppi = self.args_container.container_ppi( self._args.gff_path, self._args.proteinID_strains, self._args.without_strain_pubmed, self._args.species_STRING, self._args.score, self._paths.ppi_output_folder, self._args.node_size, self._args.query) ppi = PPINetwork(self._paths.ppi_output_folder) ppi.retrieve_ppi_network(args_ppi) def sublocal(self): """Subcellular Localization prediction""" print("Running subcellular localization prediction...") self.check_folder([self._args.gff_path, self._args.fasta_path]) self.check_no_require_folder([self._args.transcript_path]) if (self._args.bacteria_type != "positive") and ( self._args.bacteria_type != "negative"): print("Error: please assign \"positive\" or" " \"negative\" to --bacteria_type!!") sys.exit() project_creator.create_subfolders( self._paths.required_folders("subcellular_localization")) args_sub = self.args_container.container_sublocal( self._args.Psortb_path, self._args.gff_path, self._args.fasta_path, self._args.bacteria_type, self._args.difference_multi, self._args.merge_to_gff, self._paths.sublocal_output_folder, self._args.transcript_path) sublocal = SubLocal(args_sub) sublocal.run_sub_local(args_sub) def ribos(self): """riboswitch prediction""" print("Running riboswitch prediction...") self.check_folder([self._args.gff_path, self._args.fasta_path, self._args.tss_path, self._args.transcript_path]) self.check_file([self._args.riboswitch_ID, self._args.Rfam], ["--riboswitch_ID", "--Rfam"], True) project_creator.create_subfolders( self._paths.required_folders("riboswitch")) args_ribo = self.args_container.container_ribos( self._args.infernal_path, self._args.riboswitch_ID, self._args.gff_path, self._args.fasta_path, self._args.tss_path, self._args.transcript_path, self._args.Rfam, self._paths.ribos_output_folder, self._args.e_value, self._args.output_all, self._paths.database_folder, self._args.fuzzy, self._args.start_codon, self._args.min_dist_rbs, self._args.max_dist_rbs, self._args.fuzzy_rbs, self._args.UTR_length) ribos = Ribos(args_ribo) ribos.run_ribos(args_ribo) def screen(self): """generate screenshot""" print("Running screenshot generating...") self.check_file([self._args.main_gff, self._args.fasta], ["--main_gff", "--fasta"], True) if self._args.side_gffs is not None: for gff in (self._args.side_gffs.split(",")): gff = gff.strip() if not os.path.isfile(gff): print("Error: The --side_gffs no exist!!") sys.exit() if self._args.output_folder is None: print("Error: please assign --output_folder!!") sys.exit() if (self._args.present != "expand") and ( self._args.present != "collapse") and ( self._args.present != "squish"): print("Error: please assign \"expand\" or " "\"collapse\" or \"squish\" to --present!!") sys.exit() args_sc = self.args_container.container_screen( self._args.main_gff, self._args.side_gffs, self._args.fasta, self._args.frag_wig_folder, self._args.tex_wig_folder, self._args.height, self._args.tex_libs, self._args.frag_libs, self._args.present, self._args.output_folder) screen = Screen(args_sc) screen.screenshot(args_sc)
class Controller(object): """Manage the actions of the subcommands. The Controller take care of providing the argumentes like path names and the parallel processing of tasks. """ def __init__(self, args): """Create an instance.""" self._args = args self._paths = Paths(args.project_path) self.args_container = ArgsContainer() self.helper = Helper() def check_folder(self, folders): for folder in folders: if folder is None: print("Error: There is wrong path of folder assigned, " "please check it!!") sys.exit() else: if os.path.exists(folder): if len(os.listdir(folder)) == 0: print("Error: There is empty folder, " "please check it!!") sys.exit() else: print("Error: There is wrong folder, please check it!!") sys.exit() def check_parameter(self, paras, names): for i in range(len(paras)): if paras[i] is None: print("Error: {0} is wrong, " "please check it!!".format(names[i])) sys.exit() def check_no_require_folder(self, folders): for folder in folders: if folder is not None: if os.path.exists(folder): if len(os.listdir(folder)) == 0: print("Error: There is empty folder, " "please check it!!") sys.exit() else: print("Error: There is wrong folder, " "please check it!!") sys.exit() def check_file(self, files, names, require): for i in range(len(files)): if require: if files[i] is None: print("Error: {0} is wrong, " "please check it!!".format(names[i])) sys.exit() else: if not os.path.isfile(files[i]): print("Error: There is wrong path of {0}, " "please check it!!".format(names[i])) sys.exit() else: if files[i] is not None: if not os.path.isfile(files[i]): print("Error: There is wrong path of {0}, " "please check it!!".format(names[i])) sys.exit() def create_project(self, version): """Create a new project.""" project_creator.create_root_folder(self._args.project_path) project_creator.create_subfolders(self._paths.required_folders("root")) project_creator.create_subfolders( self._paths.required_folders("get_target_fasta")) project_creator.create_version_file(self._paths.version_path, version) sys.stdout.write("Created folder \"%s\" and required subfolders.\n" % (self._args.project_path)) def get_input(self): """Download required files from website.""" print("Running get input files...") if self._args.FTP_path is None: print("Error: Please assign the path for downloading the data!!") sys.exit() if self._args.for_target: annotation_folder = self._paths.tar_annotation_folder fasta_folder = self._paths.tar_fasta_folder else: annotation_folder = self._paths.ref_annotation_folder fasta_folder = self._paths.ref_fasta_folder self.helper.check_make_folder(annotation_folder) self.helper.check_make_folder(fasta_folder) if self._args.ref_gff is True: get_file(self._args.FTP_path, annotation_folder, "gff", self._args.for_target) get_file(self._args.FTP_path, annotation_folder, "_genomic.gff.gz", self._args.for_target) if self._args.ref_fasta is True: get_file(self._args.FTP_path, fasta_folder, "fna", self._args.for_target) get_file(self._args.FTP_path, fasta_folder, "_genomic.fna.gz", self._args.for_target) if self._args.ref_gbk is True: get_file(self._args.FTP_path, annotation_folder, "gbk", self._args.for_target) get_file(self._args.FTP_path, annotation_folder, "gbff", self._args.for_target) get_file(self._args.FTP_path, annotation_folder, "_genomic.gbff.gz", self._args.for_target) if self._args.ref_ptt is True: get_file(self._args.FTP_path, annotation_folder, "ptt", self._args.for_target) if self._args.ref_rnt is True: get_file(self._args.FTP_path, annotation_folder, "rnt", self._args.for_target) if self._args.convert_embl is True: annotation_files = os.listdir(annotation_folder) if len(annotation_files) == 0: sys.stdout.write("No gbk files!!\n") else: Converter().convert_gbk2embl(annotation_folder) def get_target_fasta(self): """Get target fasta""" print("Running get target fasta...") self.check_parameter([self._args.output_format], ["--output_format"]) self.check_folder([self._args.ref_fasta_folder]) self.check_file([self._args.mutation_table], "--mutation_table", True) project_creator.create_subfolders( self._paths.required_folders("get_target_fasta")) outputs = self._args.output_format.split(",") for output in outputs: output = output.strip() target = TargetFasta(self._paths.tar_fasta_folder, self._args.ref_fasta_folder) target.get_target_fasta(self._args.mutation_table, self._paths.tar_fasta_folder, self._args.ref_fasta_folder, outputs) def ratt(self): """Run RATT to transfer annotation file from reference to target.""" print("Running annotation transfer...") if (self._args.transfer_type != "Strain") and ( self._args.transfer_type != "Assembly") and (self._args.transfer_type != "Species") and ( self._args.transfer_type != "Assembly.Repetitive" ) and (self._args.transfer_type != "Strain.Repetitive") and ( self._args.transfer_type != "Species.Repetitive") and ( self._args.transfer_type != "Multiple") and (self._args.transfer_type != "Free"): print("Error: please assign correct --transfer_type!!") sys.exit() self.check_folder([ self._args.ref_embl_gbk, self._args.target_fasta, self._args.ref_fasta ]) self.check_parameter([self._args.element, self._args.compare_pair], ["--element", "--compare_pair"]) project_creator.create_subfolders( self._paths.required_folders("annotation_transfer")) args_ratt = self.args_container.container_ratt( self._args.RATT_path, self._args.element, self._args.transfer_type, self._args.ref_embl_gbk, self._args.target_fasta, self._args.ref_fasta, self._paths.ratt_folder, self._args.convert_to_gff_rnt_ptt, self._paths.tar_annotation_folder, self._args.compare_pair) ratt = RATT(args_ratt) ratt.annotation_transfer(args_ratt) def tsspredator(self): """Run TSSpredator for predicting TSS candidates.""" self.check_folder([ self._args.fasta_folder, self._args.annotation_folder, self._args.wig_folder ]) self.check_parameter([self._args.lib, self._args.output_prefix], ["--lib", "--output_prefix"]) self.check_no_require_folder([ self._args.compare_transcript_assembly, self._args.reference_gff_folder ]) self.check_file([self._args.merge_manual], ["--merge_manual"], False) if self._args.compute_program.lower() == "tss": print("Running TSS prediction...") project_creator.create_subfolders( self._paths.required_folders("TSS")) out_folder = self._paths.tsspredator_folder elif self._args.compute_program.lower() == "processing_site": print("Running processing site prediction...") out_folder = self._paths.processing_site_folder project_creator.create_subfolders( self._paths.required_folders("processing")) else: print("Error:No such program!!!!") sys.exit() args_tss = self.args_container.container_tsspredator( self._args.TSSpredator_path, self._args.compute_program, self._args.fasta_folder, self._args.annotation_folder, self._args.wig_folder, self._args.lib, self._args.output_prefix, self._args.height, self._args.height_reduction, self._args.factor, self._args.factor_reduction, self._args.base_height, self._args.enrichment_factor, self._args.processing_factor, self._args.replicate_match, out_folder, self._args.statistics, self._args.validate_gene, self._args.merge_manual, self._args.compare_transcript_assembly, self._args.fuzzy, self._args.utr_length, self._args.cluster, self._args.length, self._args.re_check_orphan, self._args.overlap_feature, self._args.reference_gff_folder, self._args.remove_low_expression) tsspredator = TSSpredator(args_tss) tsspredator.run_tsspredator(args_tss) def optimize(self): """opimize TSSpredator""" self.check_folder([ self._args.wig_folder, self._args.fasta_file, self._args.annotation_file ]) self.check_file([self._args.manual], ["--manual"], True) self.check_parameter( [self._args.strain_name, self._args.lib, self._args.output_prefix], ["--strain_name", "--lib", "--output_prefix"]) if self._args.program.lower() == "tss": print("Running optimization of TSS prediction...") project_creator.create_subfolders( self._paths.required_folders("TSS")) out_folder = self._paths.tsspredator_folder elif self._args.program.lower() == "processing_site": print("Running optimization of processing site prediction...") out_folder = self._paths.processing_site_folder project_creator.create_subfolders( self._paths.required_folders("processing")) else: print("Error:No such program!!!!") sys.exit() args_ops = self.args_container.container_optimize( self._args.TSSpredator_path, self._args.fasta_file, self._args.annotation_file, self._args.wig_folder, self._args.manual, out_folder, self._args.strain_name, self._args.max_height, self._args.max_height_reduction, self._args.max_factor, self._args.max_factor_reduction, self._args.max_base_height, self._args.max_enrichment_factor, self._args.max_processing_factor, self._args.utr_length, self._args.lib, self._args.output_prefix, self._args.cluster, self._args.length, self._args.core, self._args.program, self._args.replicate_match, self._args.steps) optimize_tss(args_ops) def color(self): """color the screenshots""" print("Running png files coloring...") self.check_parameter([self._args.track_number], ["--track_numer"]) self.check_folder([self._args.screenshot_folder]) color = ColorPNG() color.generate_color_png(self._args.track_number, self._args.screenshot_folder, self._args.ImageMagick_covert_path) def terminator(self): """Run TransTermHP for detecting terminators.""" print("Running terminator prediction...") if self._args.TransTermHP_path is None: print("Please assign the folder where you install TransTermHP.") self.check_folder([ self._args.fasta_folder, self._args.annotation_folder, self._args.transcript_folder ]) self.check_no_require_folder([self._args.sRNA]) project_creator.create_subfolders( self._paths.required_folders("terminator")) args_term = self.args_container.container_terminator( self._args.TransTermHP_path, self._args.expterm_path, self._args.RNAfold_path, self._paths.transterm_folder, self._args.fasta_folder, self._args.annotation_folder, self._args.transcript_folder, self._args.sRNA, self._args.statistics, self._args.tex_wig_folder, self._args.frag_wig_folder, self._args.decrease, self._args.highest_coverage, self._args.fuzzy_detect_coverage, self._args.fuzzy_within_transcript, self._args.fuzzy_downstream_transcript, self._args.fuzzy_within_gene, self._args.fuzzy_downstream_gene, self._paths.transtermhp_folder, self._args.tex_notex_libs, self._args.frag_libs, self._args.tex_notex, self._args.replicates_tex, self._args.replicates_frag, self._args.table_best, self._args.min_loop_length, self._args.max_loop_length, self._args.min_stem_length, self._args.max_stem_length, self._args.min_U_tail_length, self._args.miss_rate, self._args.range_U_tail) terminator = Terminator(args_term) terminator.run_terminator(args_term) def transcript(self): """Run Transcriptome assembly.""" print("Running transcript assembly...") self.check_folder([self._args.annotation_folder]) self.check_no_require_folder([ self._args.compare_TSS, self._args.compare_genome_annotation, self._args.terminator_folder ]) project_creator.create_subfolders( self._paths.required_folders("transcript_assembly")) args_tran = self.args_container.container_transcript( self._args.frag_wig_path, self._args.tex_wig_path, self._args.tex_notex, self._args.length, self._args.annotation_folder, self._args.height, self._args.width, self._args.tolerance, self._args.tolerance_coverage, self._args.replicates_tex, self._args.replicates_frag, self._paths.transcript_assembly_output_folder, self._args.compare_TSS, self._args.compare_genome_annotation, self._args.TSS_fuzzy, self._args.Tex_treated_libs, self._args.fragmented_libs, self._args.compare_feature_genome, self._args.table_best, self._args.terminator_folder, self._args.fuzzy_term) transcript = TranscriptAssembly(args_tran) transcript.run_transcript_assembly(args_tran) def utr_detection(self): """Run UTR detection.""" print("Running UTR detection...") self.check_folder([ self._args.annotation_folder, self._args.transcript_assembly_folder, self._args.TSS_folder ]) self.check_no_require_folder([self._args.terminator_folder]) project_creator.create_subfolders(self._paths.required_folders("utr")) args_utr = self.args_container.container_utr( self._args.TSS_folder, self._args.annotation_folder, self._args.transcript_assembly_folder, self._args.terminator_folder, self._args.terminator_fuzzy, self._paths.utr_folder, self._args.TSS_source, self._args.base_5UTR, self._args.UTR_length, self._args.base_3UTR) utr = UTRDetection(args_utr) utr.run_utr_detection(args_utr) def srna_detection(self): """sRNA_detection.""" print("Running sRNA prediction...") self.check_folder([ self._args.annotation_folder, self._args.transcript_assembly_folder ]) self.check_no_require_folder([ self._args.fasta_folder, self._args.sORF, self._args.terminator_folder ]) self.check_file([self._args.promoter_table], ["--promoter_table"], False) if self._args.UTR_derived_sRNA: self.check_folder( [self._args.TSS_folder, self._args.processing_site_folder]) else: self.check_no_require_folder( [self._args.TSS_folder, self._args.processing_site_folder]) project_creator.create_subfolders(self._paths.required_folders("srna")) args_srna = self.args_container.container_srna( self._args.Vienna_folder, self._args.Vienna_utils, self._args.blast_plus_folder, self._args.ps2pdf14_path, self._paths.srna_folder, self._args.UTR_derived_sRNA, self._args.annotation_folder, self._args.TSS_folder, self._args.transcript_assembly_folder, self._args.TSS_intergenic_fuzzy, self._args.TSS_5UTR_fuzzy, self._args.TSS_3UTR_fuzzy, self._args.TSS_interCDS_fuzzy, self._args.import_info, self._args.tex_wig_folder, self._args.frag_wig_folder, self._args.processing_site_folder, self._args.fasta_folder, self._args.mountain_plot, self._args.nr_format, self._args.srna_format, self._args.sRNA_database_path, self._args.nr_database_path, self._args.cutoff_energy, self._args.run_intergenic_TEX_coverage, self._args.run_intergenic_noTEX_coverage, self._args.run_intergenic_fragmented_coverage, self._args.run_antisense_TEX_coverage, self._args.run_antisense_noTEX_coverage, self._args.run_antisense_fragmented_coverage, self._args.intergenic_tolerance, self._args.run_utr_TEX_coverage, self._args.run_utr_noTEX_coverage, self._args.run_utr_fragmented_coverage, self._args.max_length, self._args.min_length, self._args.tex_notex_libs, self._args.frag_libs, self._args.replicates_tex, self._args.replicates_frag, self._args.tex_notex, self._args.blast_e_nr, self._args.blast_e_srna, self._args.detect_sRNA_in_CDS, self._args.table_best, self._args.decrease_intergenic, self._args.decrease_utr, self._args.fuzzy_intergenic, self._args.fuzzy_utr, self._args.cutoff_nr_hit, self._args.sORF, self._args.best_with_all_sRNAhit, self._args.best_without_sORF_candidate, self._args.overlap_percent_CDS, self._args.terminator_folder, self._args.terminator_fuzzy_in_CDS, self._args.terminator_fuzzy_out_CDS, self._args.best_with_terminator, self._args.ignore_hypothetical_protein, self._args.TSS_source, self._args.min_utr_coverage, self._args.promoter_table, self._args.best_with_promoter, self._args.ranking_time_promoter, self._args.promoter_name) srna = sRNADetection(args_srna) srna.run_srna_detection(args_srna) def sorf_detection(self): """sORF_detection.""" print("Running sORF prediction...") self.check_folder([ self._args.transcript_assembly_folder, self._args.annotation_folder, self._args.fasta_folder ]) self.check_no_require_folder( [self._args.sRNA_folder, self._args.TSS_folder]) project_creator.create_subfolders(self._paths.required_folders("sorf")) args_sorf = self.args_container.container_sorf( self._paths.sorf_folder, self._args.UTR_derived_sORF, self._args.transcript_assembly_folder, self._args.annotation_folder, self._args.TSS_folder, self._args.utr_length, self._args.min_length, self._args.max_length, self._args.tex_wig_folder, self._args.frag_wig_folder, self._args.cutoff_intergenic_coverage, self._args.cutoff_antisense_coverage, self._args.cutoff_5utr_coverage, self._args.cutoff_3utr_coverage, self._args.cutoff_interCDS_coverage, self._args.fasta_folder, self._args.tex_notex_libs, self._args.frag_libs, self._args.tex_notex, self._args.replicates_tex, self._args.replicates_frag, self._args.table_best, self._args.sRNA_folder, self._args.start_codon, self._args.stop_codon, self._args.cutoff_background, self._args.fuzzy_rbs, self._args.rbs_not_after_TSS, self._args.print_all_combination, self._args.best_no_sRNA, self._args.best_no_TSS, self._args.ignore_hypothetical_protein, self._args.min_rbs_distance, self._args.max_rbs_distance) sorf = sORFDetection(args_sorf) sorf.run_sorf_detection(args_sorf) def meme(self): """promoter detectopn""" print("Running promoter detection...") self.check_folder([self._args.TSS_folder, self._args.fasta_folder]) if not self._args.TSS_source: self.check_folder([self._args.annotation_folder]) project_creator.create_subfolders( self._paths.required_folders("promoter")) args_pro = self.args_container.container_promoter( self._args.MEME_path, self._paths.promoter_output_folder, self._args.tex_libs, self._args.TSS_folder, self._args.fasta_folder, self._args.num_motif, self._args.nt_before_TSS, self._args.motif_width, self._args.TSS_source, self._args.tex_wig_path, self._args.annotation_folder, self._args.combine_all, self._args.e_value) meme = MEME(args_pro) meme.run_meme(args_pro) def operon(self): """operon detection""" print("Running operon detection...") self.check_folder([ self._args.TSS_folder, self._args.annotation_folder, self._args.transcript_folder, self._args.UTR5_folder, self._args.UTR3_folder ]) self.check_no_require_folder([self._args.term_folder]) project_creator.create_subfolders( self._paths.required_folders("operon")) args_op = self.args_container.container_operon( self._args.TSS_folder, self._args.annotation_folder, self._args.transcript_folder, self._args.UTR5_folder, self._args.UTR3_folder, self._args.term_folder, self._args.TSS_fuzzy, self._args.term_fuzzy, self._args.min_length, self._args.statistics, self._paths.operon_output_folder, self._args.combine_gff, self._paths.operon_statistics_folder) operon = OperonDetection(args_op) operon.run_operon(args_op) def circrna(self): """circRNA detection""" print("Running circular RNA prediction...") self.check_folder([self._args.fasta_path, self._args.annotation_path]) self.check_no_require_folder( [self._args.tex_bam_path, self._args.fragmented_bam_path]) project_creator.create_subfolders( self._paths.required_folders("circrna")) args_circ = self.args_container.container_circrna( self._args.align, self._args.process, self._args.fasta_path, self._args.annotation_path, self._args.tex_bam_path, self._args.fragmented_bam_path, self._paths.read_folder, self._paths.circrna_stat_folder, self._args.support_reads, self._args.segemehl_folder, self._args.samtools_path, self._args.start_ratio, self._args.end_ratio, self._args.ignore_hypothetical_protein, self._paths.circrna_output_folder) circ = CircRNADetection(args_circ) circ.run_circrna(args_circ) def goterm(self): """Go term discovery""" print("Running GO term mapping...") self.check_folder([self._args.annotation_path]) self.check_no_require_folder([self._args.transcript_path]) self.check_file( [self._args.UniProt_id, self._args.go_obo, self._args.goslim_obo], ["--UniProt_id", "--go.obo", "--goslim_obo"], True) project_creator.create_subfolders( self._paths.required_folders("go_term")) args_go = self.args_container.container_goterm( self._args.annotation_path, self._paths.goterm_output_folder, self._args.UniProt_id, self._args.go_obo, self._args.goslim_obo, self._args.transcript_path) goterm = GoTermFinding(args_go) goterm.run_go_term(args_go) def srna_target(self): """sRNA target prediction""" print("Running sRNA target prediction...") self.check_folder([ self._args.fasta_path, self._args.sRNA_path, self._args.annotation_path ]) project_creator.create_subfolders( self._paths.required_folders("srna_target")) args_tar = self.args_container.container_srna_target( self._args.Vienna_folder, self._args.annotation_path, self._args.fasta_path, self._args.sRNA_path, self._args.query_sRNA, self._args.program, self._args.interaction_length, self._args.window_size_target, self._args.span_target, self._args.window_size_srna, self._args.span_srna, self._args.unstructured_region_RNAplex_target, self._args.unstructured_region_RNAplex_srna, self._args.unstructured_region_RNAup, self._args.energy_threshold, self._args.duplex_distance, self._args.top, self._paths.starget_output_folder, self._args.process_rnaplex, self._args.process_rnaup, self._args.continue_rnaup, self._args.potential_target_start, self._args.potential_target_end, self._args.target_feature) srnatarget = sRNATargetPrediction(args_tar) srnatarget.run_srna_target_prediction(args_tar) def snp(self): """SNP transcript detection""" print("Running SNP/mutations calling...") self.check_folder([self._args.fasta_path]) if (self._args.bam_type != "target") and (self._args.bam_type != "reference"): print("Error: please assign \"target\" or" " \"reference\" to --bam_type!!") sys.exit() if (self._args.ploidy != "haploid") and (self._args.ploidy != "diploid"): print("Error: please assign \"haploid\" or" " \"diploid\" to --chromosome_type!!") project_creator.create_subfolders(self._paths.required_folders("snp")) args_snp = self.args_container.container_snp( self._args.samtools_path, self._args.bcftools_path, self._args.bam_type, self._args.program, self._args.fasta_path, self._args.tex_bam_path, self._args.frag_bam_path, self._args.quality, self._args.read_depth, self._paths.snp_output_folder, self._args.indel_fraction, self._args.ploidy) snp = SNPCalling(args_snp) snp.run_snp_calling(args_snp) def ppi(self): """PPI network retrieve""" print("Running protein-protein interaction networks prediction...") self.check_folder([self._args.gff_path]) self.check_parameter( [self._args.proteinID_strains, self._args.species_STRING], ["--proteinID_strains", "--species_STRING"]) project_creator.create_subfolders( self._paths.required_folders("ppi_network")) args_ppi = self.args_container.container_ppi( self._args.gff_path, self._args.proteinID_strains, self._args.without_strain_pubmed, self._args.species_STRING, self._args.score, self._paths.ppi_output_folder, self._args.node_size, self._args.query) ppi = PPINetwork(self._paths.ppi_output_folder) ppi.retrieve_ppi_network(args_ppi) def sublocal(self): """Subcellular Localization prediction""" print("Running subcellular localization prediction...") self.check_folder([self._args.gff_path, self._args.fasta_path]) self.check_no_require_folder([self._args.transcript_path]) if (self._args.bacteria_type != "positive") and (self._args.bacteria_type != "negative"): print("Error: please assign \"positive\" or" " \"negative\" to --bacteria_type!!") sys.exit() project_creator.create_subfolders( self._paths.required_folders("subcellular_localization")) args_sub = self.args_container.container_sublocal( self._args.Psortb_path, self._args.gff_path, self._args.fasta_path, self._args.bacteria_type, self._args.difference_multi, self._args.merge_to_gff, self._paths.sublocal_output_folder, self._args.transcript_path) sublocal = SubLocal(args_sub) sublocal.run_sub_local(args_sub) def ribos(self): """riboswitch prediction""" print("Running riboswitch prediction...") self.check_folder([ self._args.gff_path, self._args.fasta_path, self._args.tss_path, self._args.transcript_path ]) self.check_file([self._args.riboswitch_ID, self._args.Rfam], ["--riboswitch_ID", "--Rfam"], True) project_creator.create_subfolders( self._paths.required_folders("riboswitch")) args_ribo = self.args_container.container_ribos( self._args.infernal_path, self._args.riboswitch_ID, self._args.gff_path, self._args.fasta_path, self._args.tss_path, self._args.transcript_path, self._args.Rfam, self._paths.ribos_output_folder, self._args.e_value, self._args.output_all, self._paths.database_folder, self._args.fuzzy, self._args.start_codon, self._args.min_dist_rbs, self._args.max_dist_rbs, self._args.fuzzy_rbs, self._args.UTR_length) ribos = Ribos(args_ribo) ribos.run_ribos(args_ribo) def screen(self): """generate screenshot""" print("Running screenshot generating...") self.check_file([self._args.main_gff, self._args.fasta], ["--main_gff", "--fasta"], True) if self._args.side_gffs is not None: for gff in (self._args.side_gffs.split(",")): gff = gff.strip() if not os.path.isfile(gff): print("Error: The --side_gffs no exist!!") sys.exit() if self._args.output_folder is None: print("Error: please assign --output_folder!!") sys.exit() if (self._args.present != "expand") and ( self._args.present != "collapse") and (self._args.present != "squish"): print("Error: please assign \"expand\" or " "\"collapse\" or \"squish\" to --present!!") sys.exit() args_sc = self.args_container.container_screen( self._args.main_gff, self._args.side_gffs, self._args.fasta, self._args.frag_wig_folder, self._args.tex_wig_folder, self._args.height, self._args.tex_libs, self._args.frag_libs, self._args.present, self._args.output_folder) screen = Screen(args_sc) screen.screenshot(args_sc)