def check_srna(input, output): """ Check whether the input species in the SSU rRNA database :param input: the species names :param output: the directory contains check result """ if not os.path.exists(output): os.makedirs(output) fw_name = "PhySpeTree_srna_checked.txt" open_path = os.path.join(output, fw_name) fw = open(open_path, 'wb') input_path = checkFile(input) input_list = readIputFile(input_path) in_match, no_match = check_organism(input_list, "kegg_to_silva_id.txt") if no_match.__len__() == 0: print(print_style('INFO: ', fore='green') + "All species are match in SILVA DATABASE") fw.write("All species are match in SILVA DATABASE") else: print(print_style('WARNING: ', fore='red') + "The following species are not supported by SILVA DATABASE:") fw.write("The following species are not supported by SILVA DATABASE:\n") for line in no_match: print(line) fw.write(line + "\n") print("Checked whether the input species names in SILVA DATABASE completed.") print("Checked result is store in {0}".format(open_path)) fw.close()
def annotatingLabels(input, output): """ Change labels :param input: input a files contain abb names :param output: output directory path :return: None """ if not os.path.exists(output): os.makedirs(output) fw_name = "labels.txt" open_path = os.path.join(output, fw_name) fw = open(open_path, 'wb') fw.write('LABELS\nSEPARATOR TAB\nDATA\n') inputfile = checkFile(input) input_list = readIputFile(inputfile) tax_list = taxlist() for i in input_list: for j in tax_list: if i == j[0]: fw.write("{0}\t{1}\n".format(j[0], j[1])) else: pass fw.close() print("Change abbreviation names to full names complete") print("change labels file was save in {0}".format(open_path))
def check_hcp(input, output): """ Check whether the input species in the hcp database :param input: the species names :param output: the directory contains check result """ if not os.path.exists(output): os.makedirs(output) fw_name = "PhySpeTree_hcp_checked.txt" open_path = os.path.join(output, fw_name) fw = open(open_path, 'w') input_path = checkFile(input) input_list = readIputFile(input_path) in_match, no_match = check_organism(input_list, "organism_kegg_to_tax.txt") if no_match.__len__() == 0: print( print_style('INFO: ', fore='green') + "All species are match in KEGG DATABASE.") fw.write("All species are match in KEGG DATABASE") else: print( print_style('WARNING: ', fore='red') + "The following species are not supported by KEGG DATABASE:") fw.write("The following species are not supported by KEGG DATABASE:\n") for line in no_match: print(line) fw.write(line + "\n") print( "Checked whether the input species names in KEGG DATABASE completed.") print("Checked result is store in {0}".format(open_path)) fw.close()
def check_ehcp(input, output): """ Check the highly conserved proteins will be prepared. :param input: the species abbreviated names :param output: a directory contain check result """ # prepare a file write result if not os.path.exists(output): os.makedirs(output) fw_name = "PhySpeTree_echp_extend.txt" open_path = os.path.join(output, fw_name) fw = open(open_path, 'wb') # check input names input_path = checkFile(input) input_list = readIputFile(input_path) colname = getcolname() relist, match_ko = getspecies(input_list, colname) p = 1 for line in match_ko: hcpname = hcp_name(line.strip()) massage = "'{0}' ----------------------------------> p{1}.fasta\n".format(hcpname, str(p)) print(massage) fw.write(massage) p += 1 print("Checked extend highly conserved proteins is completed.") print("Checked result was store in {0}".format(open_path)) fw.close()
def check_ehcp(input, output): """ Check the highly conserved proteins will be prepared. :param input: the species abbreviated names :param output: a directory contain check result """ # prepare a file write result if not os.path.exists(output): os.makedirs(output) fw_name = "PhySpeTree_echp_extend.txt" open_path = os.path.join(output, fw_name) fw = open(open_path, 'w') # check input names input_path = checkFile(input) input_list = readIputFile(input_path) colname = getcolname() relist, match_ko = getspecies(input_list, colname) p = 1 for line in match_ko: hcpname = hcp_name(line.strip()) massage = "'{0}' ----------------------------------> p{1}.fasta\n".format( hcpname, str(p)) print(massage) fw.write(massage) p += 1 print("Checked extend highly conserved proteins is completed.") print("Checked result was store in {0}".format(open_path)) fw.close()
def starting_esrna(in_put, out_put, args_muscle, args_muscle_p, args_clustalw, args_clustalw_p, args_gblocks, args_raxml, args_raxml_p, args_fasttree, args_fasttree_p, args_thread, args_extenddata): '''reconstruct phylogenetic tree by ssu rna extend method''' extend_check = checkFile(args_extenddata) ssu_input = checkSilvaOrganism(in_put) out_retrieve = retrieve16srna(ssu_input, out_put) retrieve_srna_path = os.path.join(out_retrieve, 'rna_sequence.fasta') fw = open(retrieve_srna_path, 'ab') with open(extend_check) as read: for line in read: fw.write(line) fw.close() # set default aligned by muscle if not specify clustalw if args_clustalw: out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p) elif args_muscle: out_alg = domuscle(out_retrieve, out_put, args_muscle_p) # gblocks if args_gblocks is gblockspara_pro: args_gblocks = gblockspara_dna out_gblock = dogblocks(out_alg, args_gblocks) out_f2p = fasta2phy(out_gblock) # reconstruct tree if args_fasttree: args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip() doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread) elif args_raxml: if args_raxml_p is raxmlpara_pro: args_raxml_p = raxmlpara_dna doraxml(out_f2p, out_put, args_raxml_p, args_thread)
def colorLabel(input, output, taxon): """ Main function to color label :param input: input file names :param output: output directory path :param taxon: choice annotation by ['kingdom', 'phylum', 'class', 'order'] :return: no """ if not os.path.exists(output): os.makedirs(output) # writ file name fw_name = "labels_color_by_" + taxon + ".txt" open_path = os.path.join(output, fw_name) fw = open(open_path, 'w') # write annotation range head fw.write(cite_label) fw.write('\n') fw.write('TREE_COLORS\nSEPARATOR TAB\nDATA\n') # check and get input list inputfile = checkFile(input) input_list = readIputFile(inputfile) # get match list and annotation dict to annotation match_list, anno_dict = matchInput(input_list, taxon) # write to annotation to file for line in match_list: color = anno_dict[line[1]] write_data = "{0}\tlabel\t{1}\n".format(line[0], color) fw.write(write_data) fw.close() print("Color labels by {0} was complete.".format(taxon)) print("Color labels annotation was save in {0}".format(open_path))
def annotatingLabels(input, output): """ Change labels :param input: input a files contain abb names :param output: output directory path :return: None """ if not os.path.exists(output): os.makedirs(output) fw_name = "labels.txt" open_path = os.path.join(output, fw_name) fw = open(open_path, 'w') fw.write(cite) fw.write('\n') fw.write('LABELS\nSEPARATOR TAB\nDATA\n') inputfile = checkFile(input) input_list = readIputFile(inputfile) if is_number(input_list[0]): tax_list = taxlistid() else: tax_list = taxlist() for i in input_list: for j in tax_list: if i == j[0]: fw.write("{0}\t{1}\n".format(j[0], j[1])) fw.close() print("Change abbreviation names to full names complete") print("change labels file was save in {0}".format(open_path))
def colorRange(input, output, taxon): """ Main function to color range :param input: input file names :param output: output directory path :param taxon: choice annotation by ['kingdom', 'phylum', 'class', 'order'] :return: no """ if not os.path.exists(output): os.makedirs(output) # writ file name fw_name = "range_color_by_" + taxon + ".txt" open_path = os.path.join(output, fw_name) fw = open(open_path, 'wb') # write annotation range head fw.write('TREE_COLORS\nSEPARATOR TAB\nDATA\n') # check and get input list inputfile = checkFile(input) input_list = readIputFile(inputfile) # get match list and annotation dict to annotation match_list, anno_dict = matchInput(input_list, taxon) # write to annotation to file for line in match_list: color = anno_dict[line[1]] write_data = "{0}\trange\t{1}\t{2}\n".format(line[0], color, line[1]) fw.write(write_data) fw.close() print("Color range by {0} was complete.".format(taxon)) print("Color range annotation was save in {0}".format(open_path))
def starting_esrna(in_put, out_put, args_muscle, args_muscle_p, args_clustalw, args_clustalw_p, args_mafft, args_mafft_p, args_gblocks, args_gblocks_p, args_trimal, args_trimal_p, args_raxml, args_raxml_p, args_fasttree, args_fasttree_p, args_iqtree, args_iqtree_p, args_thread, args_extenddata, args_db): '''reconstruct phylogenetic tree by ssu rna extend method''' extend_check = checkFile(args_extenddata) ssu_input, recovery_dic = checkSilvaOrganism(in_put) start = time.time() out_retrieve = retrieve16srna(ssu_input, out_put, args_db) end = time.time() auto_build_log.info( 'Retrieving SSU rRNA sequences used time: {} Seconds'.format(end - start)) if not recovery_dic == []: recovery_silva(out_retrieve, recovery_dic, ssu_input) retrieve_srna_path = os.path.join(out_retrieve, 'rna_sequence.fasta') fw = open(retrieve_srna_path, 'a') with open(extend_check) as read: for line in read: fw.write(line) fw.close() # set default aligned by muscle if not specify clustalw start2 = time.time() if args_clustalw: out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p) elif args_mafft: out_alg = domafft(out_retrieve, out_put, args_mafft_p) elif args_muscle: out_alg = domuscle(out_retrieve, out_put, args_muscle_p) # set default trim by gblocks if not specify trimal if args_trimal: out_f2p = dotrimal(out_alg, args_trimal_p) elif args_gblocks: if args_gblocks_p is gblockspara_pro: args_gblocks_p = gblockspara_dna out_gblock = dogblocks(out_alg, args_gblocks_p) out_f2p = fasta2phy(out_gblock) # reconstruct tree if args_fasttree: args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip() doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread) elif args_iqtree: doiqtree(out_f2p, out_put, args_iqtree_p, args_thread) elif args_raxml: if args_raxml_p is raxmlpara_pro: args_raxml_p = raxmlpara_dna doraxml(out_f2p, out_put, args_raxml_p, args_thread) end2 = time.time() auto_build_log.info( 'Constructing species tree used time: {} Seconds'.format(end2 - start2))
def starting_esrna(in_put, out_put, args_muscle, args_muscle_p, args_clustalw, args_clustalw_p, args_mafft, args_mafft_p, args_gblocks, args_gblocks_p, args_trimal, args_trimal_p, args_raxml, args_raxml_p, args_fasttree, args_fasttree_p, args_iqtree,args_iqtree_p, args_thread, args_extenddata): '''reconstruct phylogenetic tree by ssu rna extend method''' extend_check = checkFile(args_extenddata) ssu_input,recovery_dic = checkSilvaOrganism(in_put) out_retrieve = retrieve16srna(ssu_input, out_put) if not recovery_dic == {}: recovery(out_retrieve,recovery_dic) retrieve_srna_path = os.path.join(out_retrieve, 'rna_sequence.fasta') fw = open(retrieve_srna_path, 'ab') with open(extend_check) as read: for line in read: fw.write(line) fw.close() # set default aligned by muscle if not specify clustalw if args_clustalw: out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p) elif args_mafft: out_alg = domafft(out_retrieve, out_put, args_mafft_p) elif args_muscle: out_alg = domuscle(out_retrieve, out_put, args_muscle_p) # set default trim by gblocks if not specify trimal if args_trimal: out_f2p = dotrimal(out_alg, args_trimal_p) elif args_gblocks: if args_gblocks_p is gblockspara_pro: args_gblocks_p = gblockspara_dna out_gblock = dogblocks(out_alg, args_gblocks_p) out_f2p = fasta2phy(out_gblock) # reconstruct tree if args_fasttree: args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip() doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread) elif args_iqtree: doiqtree(out_f2p, out_put, args_iqtree_p, args_thread) elif args_raxml: if args_raxml_p is raxmlpara_pro: args_raxml_p = raxmlpara_dna doraxml(out_f2p, out_put, args_raxml_p, args_thread)