Beispiel #1
0
def getspecies(name, colname):
    """
    Get species protein index for DB
    :param name: a list contain abbreviation species nam
    :param colname: a list contain colname of DB
    :return: a list contain protein index can be retrieved and a match ko list (is a ko id list)
    """
    dbpath = getlocaldbpath()
    db = os.path.join(dbpath, "KEGG_DB_1.0.db")
    relist = []
    match_ko_name = []
    conn = sqlite3.connect(db)
    conn.text_factory = str
    c = conn.cursor()
    connect = "' OR NAME = '".join(name)
    for ko in colname:

        query = "SELECT " + ko + " FROM proindex WHERE NAME = '" + connect + "'"
        c.execute(query)
        ids = list(c.fetchall())
        idslist = [str(x[0]) for x in ids]
        if 'None' not in idslist:
            relist.append(idslist)
            match_ko_name.append(ko)
        else:
            pass
    c.close()
    return relist, match_ko_name
def hcp_name(index):
    """get highly conserved protein names from ko list"""
    ko_path = getlocaldbpath()
    pro_ko = os.path.join(ko_path, "protein_ko.txt")
    with open(pro_ko) as ko:
        for line in ko:
            name = line.strip().split(',')
            if name[1] == index:
                return name[0]
def getcolname():
    """get BD colnames"""
    dbpath = getlocaldbpath()
    db = os.path.join(dbpath, KEGGDB)
    conn = sqlite3.connect(db)
    conn.text_factory = str
    c = conn.cursor()
    c.execute("SELECT * FROM proindex")
    col_name_list = [tuple[0] for tuple in c.description]
    c.close()
    return col_name_list[2:]
def getspecies(spelist, colname):
    """
    Get species protein index for DB
    :param name: a list contain abbreviation species nam
    :param colname: a list contain colname of DB
    :return: a list contain protein index can be retrieved and a match ko list (is a ko id list)
    """
    dbpath = getlocaldbpath()
    db = os.path.join(dbpath, KEGGDB)
    relist = []
    match_ko_name = []
    conn = sqlite3.connect(db)
    conn.text_factory = str
    c = conn.cursor()
    if len(spelist) >= 1000:
        sp = splist(spelist, 500)
    else:
        sp = [spelist]

    for ko in colname:
        tem_reslist = []
        tem_none = 0
        for line in sp:
            connect = "' OR NAME = '".join(line)
            query = "SELECT " + ko + " FROM proindex WHERE NAME = '" + connect + "'"
            c.execute(query)
            ids = list(c.fetchall())
            idslist = [str(x[0]) for x in ids]

            num_none = len([x for x in idslist if x == 'None'])
            tem_none += num_none

            tem_reslist.extend(idslist)

        if tem_none != len(tem_reslist):
            relist.append(tem_reslist)
            match_ko_name.append(ko)

    c.close()
    return relist, match_ko_name
Beispiel #5
0
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more #
# details.                                                                     #
#                                                                              #
# You should have received a copy of the GNU Lesser General Public License     #
# along with Physpe. If not, see <http://www.gnu.org/licenses/>.               #
#                                                                              #
# ##############################################################################
"""
Change labels from abbreviation names to full names.
"""

from physpetool.database.dbpath import getlocaldbpath
from physpetool.utils.checkinputfile import checkFile, readIputFile
import os

dbpath = getlocaldbpath()


def taxlist():
    """
    prepare Taxonomy list
    :return: taxonomy list
    """
    orgpath = os.path.join(dbpath, "organism.txt")
    organism_list = []
    with open(orgpath) as f:
        for org in f:
            each_org = org.strip().split('\t')
            organism_list.append([each_org[1], each_org[2]])
    return organism_list
#                                                                               #
# ###############################################################################


"""
Check input file is right.

"""

from physpetool.database.dbpath import getlocaldbpath
from physpetool.phylotree.log import getLogging
import os

from physpetool.utils.checkIsNum import is_number

dbpath = getlocaldbpath()
logchecking = getLogging('Checking organisms')


def check_organism(input, db_list):
    """
    check input organism
    :param input: a list contain species name
    :param db_list: a list file contain organism in corresponding database
    :return: inputlist: match in database mislist: can't match in database
    """
    originaList = input

    inputlist = []
    mislist = []
    spelist = os.path.join(dbpath, db_list)