def getspecies(name, colname): """ Get species protein index for DB :param name: a list contain abbreviation species nam :param colname: a list contain colname of DB :return: a list contain protein index can be retrieved and a match ko list (is a ko id list) """ dbpath = getlocaldbpath() db = os.path.join(dbpath, "KEGG_DB_1.0.db") relist = [] match_ko_name = [] conn = sqlite3.connect(db) conn.text_factory = str c = conn.cursor() connect = "' OR NAME = '".join(name) for ko in colname: query = "SELECT " + ko + " FROM proindex WHERE NAME = '" + connect + "'" c.execute(query) ids = list(c.fetchall()) idslist = [str(x[0]) for x in ids] if 'None' not in idslist: relist.append(idslist) match_ko_name.append(ko) else: pass c.close() return relist, match_ko_name
def hcp_name(index): """get highly conserved protein names from ko list""" ko_path = getlocaldbpath() pro_ko = os.path.join(ko_path, "protein_ko.txt") with open(pro_ko) as ko: for line in ko: name = line.strip().split(',') if name[1] == index: return name[0]
def getcolname(): """get BD colnames""" dbpath = getlocaldbpath() db = os.path.join(dbpath, KEGGDB) conn = sqlite3.connect(db) conn.text_factory = str c = conn.cursor() c.execute("SELECT * FROM proindex") col_name_list = [tuple[0] for tuple in c.description] c.close() return col_name_list[2:]
def getspecies(spelist, colname): """ Get species protein index for DB :param name: a list contain abbreviation species nam :param colname: a list contain colname of DB :return: a list contain protein index can be retrieved and a match ko list (is a ko id list) """ dbpath = getlocaldbpath() db = os.path.join(dbpath, KEGGDB) relist = [] match_ko_name = [] conn = sqlite3.connect(db) conn.text_factory = str c = conn.cursor() if len(spelist) >= 1000: sp = splist(spelist, 500) else: sp = [spelist] for ko in colname: tem_reslist = [] tem_none = 0 for line in sp: connect = "' OR NAME = '".join(line) query = "SELECT " + ko + " FROM proindex WHERE NAME = '" + connect + "'" c.execute(query) ids = list(c.fetchall()) idslist = [str(x[0]) for x in ids] num_none = len([x for x in idslist if x == 'None']) tem_none += num_none tem_reslist.extend(idslist) if tem_none != len(tem_reslist): relist.append(tem_reslist) match_ko_name.append(ko) c.close() return relist, match_ko_name
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # # details. # # # # You should have received a copy of the GNU Lesser General Public License # # along with Physpe. If not, see <http://www.gnu.org/licenses/>. # # # # ############################################################################## """ Change labels from abbreviation names to full names. """ from physpetool.database.dbpath import getlocaldbpath from physpetool.utils.checkinputfile import checkFile, readIputFile import os dbpath = getlocaldbpath() def taxlist(): """ prepare Taxonomy list :return: taxonomy list """ orgpath = os.path.join(dbpath, "organism.txt") organism_list = [] with open(orgpath) as f: for org in f: each_org = org.strip().split('\t') organism_list.append([each_org[1], each_org[2]]) return organism_list
# # # ############################################################################### """ Check input file is right. """ from physpetool.database.dbpath import getlocaldbpath from physpetool.phylotree.log import getLogging import os from physpetool.utils.checkIsNum import is_number dbpath = getlocaldbpath() logchecking = getLogging('Checking organisms') def check_organism(input, db_list): """ check input organism :param input: a list contain species name :param db_list: a list file contain organism in corresponding database :return: inputlist: match in database mislist: can't match in database """ originaList = input inputlist = [] mislist = [] spelist = os.path.join(dbpath, db_list)