Example #1
def getspecies(name, colname):
    Get species protein index for DB
    :param name: a list contain abbreviation species nam
    :param colname: a list contain colname of DB
    :return: a list contain protein index can be retrieved and a match ko list (is a ko id list)
    dbpath = getlocaldbpath()
    db = os.path.join(dbpath, "KEGG_DB_1.0.db")
    relist = []
    match_ko_name = []
    conn = sqlite3.connect(db)
    conn.text_factory = str
    c = conn.cursor()
    connect = "' OR NAME = '".join(name)
    for ko in colname:

        query = "SELECT " + ko + " FROM proindex WHERE NAME = '" + connect + "'"
        ids = list(c.fetchall())
        idslist = [str(x[0]) for x in ids]
        if 'None' not in idslist:
    return relist, match_ko_name
Example #2
def hcp_name(index):
    """get highly conserved protein names from ko list"""
    ko_path = getlocaldbpath()
    pro_ko = os.path.join(ko_path, "protein_ko.txt")
    with open(pro_ko) as ko:
        for line in ko:
            name = line.strip().split(',')
            if name[1] == index:
                return name[0]
Example #3
def getcolname():
    """get BD colnames"""
    dbpath = getlocaldbpath()
    db = os.path.join(dbpath, KEGGDB)
    conn = sqlite3.connect(db)
    conn.text_factory = str
    c = conn.cursor()
    c.execute("SELECT * FROM proindex")
    col_name_list = [tuple[0] for tuple in c.description]
    return col_name_list[2:]
Example #4
def getspecies(spelist, colname):
    Get species protein index for DB
    :param name: a list contain abbreviation species nam
    :param colname: a list contain colname of DB
    :return: a list contain protein index can be retrieved and a match ko list (is a ko id list)
    dbpath = getlocaldbpath()
    db = os.path.join(dbpath, KEGGDB)
    relist = []
    match_ko_name = []
    conn = sqlite3.connect(db)
    conn.text_factory = str
    c = conn.cursor()
    if len(spelist) >= 1000:
        sp = splist(spelist, 500)
        sp = [spelist]

    for ko in colname:
        tem_reslist = []
        tem_none = 0
        for line in sp:
            connect = "' OR NAME = '".join(line)
            query = "SELECT " + ko + " FROM proindex WHERE NAME = '" + connect + "'"
            ids = list(c.fetchall())
            idslist = [str(x[0]) for x in ids]

            num_none = len([x for x in idslist if x == 'None'])
            tem_none += num_none


        if tem_none != len(tem_reslist):

    return relist, match_ko_name
Example #5
Change labels from abbreviation names to full names.

from physpetool.database.dbpath import getlocaldbpath
from physpetool.utils.checkinputfile import checkFile, readIputFile
import os

dbpath = getlocaldbpath()

def taxlist():
    prepare Taxonomy list
    :return: taxonomy list
    orgpath = os.path.join(dbpath, "organism.txt")
    organism_list = []
    with open(orgpath) as f:
        for org in f:
            each_org = org.strip().split('\t')
            organism_list.append([each_org[1], each_org[2]])
    return organism_list
Check input file is right.


from physpetool.database.dbpath import getlocaldbpath
from physpetool.phylotree.log import getLogging
import os

from physpetool.utils.checkIsNum import is_number

dbpath = getlocaldbpath()
logchecking = getLogging('Checking organisms')

def check_organism(input, db_list):
    check input organism
    :param input: a list contain species name
    :param db_list: a list file contain organism in corresponding database
    :return: inputlist: match in database mislist: can't match in database
    originaList = input

    inputlist = []
    mislist = []
    spelist = os.path.join(dbpath, db_list)