# # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ function to call FastTree and FastTree reconstruct tree. """ import os import subprocess from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath logdofasttree = getLogging('FastTree') def doFastTree(inputfile, outputfile, FastTreepara, thread): # Use FASTA format build tree input_fasta = inputfile.replace('.phy', '') FastTreePath = getlocalpath() thread_to_str = str(thread) out_tree_name = os.path.join(outputfile, "FastTree.tree") if not os.path.exists(outputfile): os.mkdir(outputfile) if thread_to_str is '1': cmd = FastTreePath + "/FastTree " + FastTreepara + input_fasta + " >" + out_tree_name subprocess.call(cmd, shell=True) else: # set the threads
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Module to call muscle to do alignment """ import subprocess import os import os.path import time from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath logdomuscle = getLogging('muscle') # muscle -in process_L1.txt -out process_L1.afa -maxiters 100 def domuscle(indata, outdata, musclepara): """ call muscle software to do align :param indata: a director contain a fasta format file or a fasta format file :param outdata: the out is abs path with a file name :return: outdata path """ muscleparas = musclepara.lstrip() mupath = getlocalpath() out_path = os.path.dirname(outdata) timeformat = '%Y%m%d%H%M%S' timeinfo = str(time.strftime(timeformat))
The module retrieve highly conserved proteins and download from KEGG database """ import shutil import glob import ftplib import os import sqlite3 import time from physpetool.database.dbpath import getlocaldbpath from physpetool.phylotree.log import getLogging from physpetool.tools.keggapi import getprotein logretrieveprotein = getLogging('KEGG INDEX DB') KEGGDB = "KEGG_DB_3.0.db" def getspecies(spelist, colname): """ Get species protein index for DB :param name: a list contain abbreviation species nam :param colname: a list contain colname of DB :return: a list contain protein index can be retrieved and a match ko list (is a ko id list) """ dbpath = getlocaldbpath() db = os.path.join(dbpath, KEGGDB) relist = [] match_ko_name = [] conn = sqlite3.connect(db)
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # # details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Convert FASTA(.fasta) format to PHYLIP(.phy) format """ from physpetool.phylotree.log import getLogging logfasta2phy = getLogging('convert') # function to read fasta file def read_fasta(fp): """ read *.fasta file and parse :param fp: a fasta format file """ seq_name, seq_pro = None, [] for line in fp: line = line.rstrip() if line.startswith(">"): if seq_name: yield (seq_name, ''.join(seq_pro)) seq_name, seq_pro = line.replace(">", ""), [] else:
# # # ############################################################################### """ Module to call clustalw2 to do alignment """ # clustalw2 -INFILE=p1.fasta -TYPE=PROTEIN -OUTPUT=FASTA -ALIGN -OUTFILE=test/clustalw_p2.fasta import os import subprocess from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath from physpetool.utils.outdirforamt import timeformat logdoclustalw = getLogging('clutalw2') def doclustalw(indata, outdata, clustalwpara): """ Call clustalw software to do align :param indata: a director contain a fasta format file or a fasta format file :param outdata: the out is abs path with a file name :return: outdata path """ logdoclustalw.debug("clustalw input data:{0}".format(indata)) type = "-TYPE=DNA" if clustalwpara is None: clustalwparas = type else: clustalwparas = type + " " + clustalwpara.lstrip()
# # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### import os import time from physpetool.phylotree.log import getLogging """ Concatenate the muscle alignment files to one file. """ logconcat = getLogging('concatenate') def read_fasta_co(fp): """read *.fasta file and parse :param fp: a fasta format file """ seq_name, seq_pro = None, [] for line in fp: line = line.rstrip() if line.startswith(">"): if seq_name: yield (seq_name, ''.join(seq_pro)) seq_name, seq_pro = line, [] else: seq_pro.append(line.replace(" ", "")) if seq_name: yield (seq_name, ''.join(seq_pro))
# details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ function to call RAxML construct tree """ import os import subprocess from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath logdoraxml = getLogging('RAxML') def doraxml(inputfile, outputfile, raxmlpara, thread): """ call RAxML method to construct species tree :param inputfile: abs path of .phy format files :param outputfile: a file contain RAxML result """ raxmlparas = raxmlpara.lstrip() raxmlparalist = raxmlparas.split(" ") tpara = '-T' if tpara in raxmlparalist: index = raxmlparalist.index(tpara) raxmlparalist.remove(raxmlparalist[index])
# details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ The module retrieve and download SSU rRNA sequence from SILVA Database """ import ftplib import os import time from physpetool.phylotree.log import getLogging log_retrieve = getLogging('SSU rRNA DB') def retrieve16srna(spenamelist, outpath): """ retrieve 16s rna form bioinfor.scu.edu.cn :param spenamelist: a list contain species names :param outpath: output data path :return: download file path """ # pares and cheak species names lsit # spelist = [] # for line in spenamelist: # st = line.strip() # spelist.append(st) spelist = spenamelist
# # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ function to call iqtree and iqtree reconstruct tree. """ import os import subprocess from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath logdoiqtree = getLogging('iqtree') def doiqtree(inputfile, outputfile, iqtreepara, thread): # Use FASTA format build tree # input_fasta = inputfile.replace('.phy', '') iqtreePath = getlocalpath() thread_to_str = str(thread) out_tree_name = os.path.join(outputfile, "iqtree.tree") if not os.path.exists(outputfile): os.mkdir(outputfile) if thread_to_str is '1': cmd = iqtreePath + "/iqtree " + "-s " + inputfile + " -pre " + out_tree_name + iqtreepara subprocess.call(cmd, shell=True) else: # set the threads
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Module to call muscle to do alignment """ import multiprocessing import subprocess import os import os.path import time from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath logdomuscle = getLogging('muscle') # muscle -in process_L1.txt -out process_L1.afa -maxiters 100 def domuscle(indata, outdata, musclepara): """ call muscle software to do align :param indata: a director contain a fasta format file or a fasta format file :param outdata: the out is abs path with a file name :return: outdata path """ muscleparas = musclepara.lstrip() mupath = getlocalpath() out_path = os.path.dirname(outdata) timeformat = '%Y%m%d%H%M%S' timeinfo = str(time.strftime(timeformat))
# # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Check input file is right. """ from physpetool.database.dbpath import getlocaldbpath from physpetool.phylotree.log import getLogging import os dbpath = getlocaldbpath() logchecking = getLogging('Checking organisms') def check_organism(input, db_list): """ check input organism :param input: a list contain species name :param db_list: a list file contain organism in corresponding database :return: inputlist: match in database mislist: can't match in database """ originaList = [] for line in input: st = line.strip() originaList.append(st) originaList = removeEmptyStr(originaList) inputlist = []
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # # details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### import os import subprocess import re from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath loggtrimal = getLogging('trimal') # trimal -in concatenate.fasta -out output2 -gt 1 -phylip3.2 def dotrimal(indata, trimalpara): """ do trimal after muslce and concatenate :param indata: a fasta file input to do trimal :param outdata: append name after :return: a file path of trimal result """ # Deal with outdata name trimalparas = trimalpara.lstrip() trimalpath = getlocalpath() out_path = os.path.dirname(indata)
from physpetool.phylotree.log import getLogging testlog = getLogging('wwww') testlog.error("~~~~~~~~~~~~~~test1.") testlog.info("~~~~~~~~~~~~~~test2.") testlog.debug("~~~~~~~~~~~~~~~~~~~~~~~~~test3.")
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Module to call clustalw2 to do alignment """ # clustalw2 -INFILE=p1.fasta -TYPE=PROTEIN -OUTPUT=FASTA -ALIGN -OUTFILE=test/clustalw_p2.fasta import os import subprocess from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath from physpetool.utils.outdirforamt import timeformat logdoclustalw = getLogging('clutalw2') def doclustalw(indata, outdata, clustalwpara): """ Call clustalw software to do align :param indata: a director contain a fasta format file or a fasta format file :param outdata: the out is abs path with a file name :return: outdata path """ logdoclustalw.debug("clustalw input data:{0}".format(indata)) type = "-TYPE=DNA" if clustalwpara is None: clustalwparas = type else: clustalwparas = type + " " + clustalwpara.lstrip()
# ############################################################################### """ Check input file is right. """ from physpetool.database.dbpath import getlocaldbpath from physpetool.phylotree.log import getLogging import os from physpetool.utils.checkIsNum import is_number dbpath = getlocaldbpath() logchecking = getLogging('Checking organisms') def check_organism(input, db_list): """ check input organism :param input: a list contain species name :param db_list: a list file contain organism in corresponding database :return: inputlist: match in database mislist: can't match in database """ originaList = input inputlist = [] mislist = [] spelist = os.path.join(dbpath, db_list) for line in originaList:
# # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ function to call FastTree and FastTree reconstruct tree. """ import os import subprocess from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath logdofasttree = getLogging('FastTree') def doFastTree(inputfile, outputfile, FastTreepara, thread): # Use FASTA format build tree input_fasta = inputfile.replace('.phy', '') FastTreePath = getlocalpath() thread_to_str = str(thread) out_tree_name = os.path.join(outputfile, "FastTree.tree") if not os.path.exists(outputfile): os.mkdir(outputfile) if thread_to_str is '1': cmd = FastTreePath + "/FastTree " + FastTreepara + " " + input_fasta + " >" + out_tree_name subprocess.call(cmd, shell=True) else: # set the threads
# along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Module to call mafft to do alignment """ import subprocess import os import os.path import time from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath logmafft = getLogging('mafft') # mafft --auto p1.fasta > output def domafft(indata, outdata, mafftparas): """ call mafft software to do align :param indata: a director contain a fasta format file or a fasta format file :param outdata: the out is abs path with a file name :return: outdata path """ mafftparas = mafftparas.lstrip() mapath = getlocalpath() out_path = os.path.dirname(outdata) timeformat = '%Y%m%d%H%M%S' timeinfo = str(time.strftime(timeformat))
from physpetool.utils.checkinputfile import checkKeggOrganism, checkSilvaOrganism, checkFile, recovery, recovery_silva import argparse import os APP_DESC = "Reconstruct" raxmlpara_pro = "-f a -m PROTGAMMAJTTX -p 12345 -x 12345 -# 100 -n T1" raxmlpara_dna = "-f a -m GTRGAMMA -p 12345 -x 12345 -# 100 -n T1" musclepara = '-maxiters 100' gblockspara_pro = '-t=p -b5=h -e=-gb1' gblockspara_dna = '-t=d -b5=h -e=-gb1' clustalwpara = None trimalpara = "-gt 1" mafftpara = "--auto" auto_build_log = getLogging('Used time') def start_args(input): """ Argument parse :param input: arguments """ autobuild_args = input.add_argument_group("AUTOBUILD OPTIONS") advance_args = input.add_argument_group("ADVANCE OPTIONS") autobuild_args.add_argument( '-i', nargs='?', dest='spenames', type=argparse.FileType('r'), help=
# PhySpeTree is distributed in the hope that it will be useful, but WITHOUT ANY # # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # # details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Convert FASTA(.fasta) format to PHYLIP(.phy) format """ from physpetool.phylotree.log import getLogging logfasta2phy = getLogging('convert') # function to read fasta file def read_fasta(fp): """ read *.fasta file and parse :param fp: a fasta format file """ seq_name, seq_pro = None, [] for line in fp: line = line.rstrip() if line.startswith(">"): if seq_name: yield (seq_name, ''.join(seq_pro)) seq_name, seq_pro = line.replace(">", ""), [] else:
# You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Module to call mafft to do alignment """ import multiprocessing import subprocess import os import os.path import time from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath logmafft = getLogging('mafft') # mafft --auto p1.fasta > output def domafft(indata, outdata, mafftparas): """ call mafft software to do align :param indata: a director contain a fasta format file or a fasta format file :param outdata: the out is abs path with a file name :return: outdata path """ mafftparas = mafftparas.lstrip() mapath = getlocalpath() out_path = os.path.dirname(outdata) timeformat = '%Y%m%d%H%M%S' timeinfo = str(time.strftime(timeformat))
# # # You should have received a copy of the GNU Lesser General Public License # # along with PhySpeTree. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################### """ Gblocks module: do gblocks before aligned sequences. """ import os import subprocess import re from physpetool.phylotree.log import getLogging from physpetool.softwares.path import getlocalpath loggblocks = getLogging('Gblocks') # Gblocks protein_alignment.fasta -t=p -e=-gb1 -b4=5 -d=y def dogblocks(indata, gblockpara): """ do gblocks after muslce and concatenate :param indata: a fasta file input after gblock :param gblockpara: the gblocks para :return: a file path of gblocks result """ # Deal with outdata name gblockparas = gblockpara.lstrip() gblockparalist = gblockparas.split(" ") regex = '-e='