Exemple #1
0
def print_all_proteins():
    """
    Print all the proteins to stdout
    :return:  nothing
    """

    con = connect_to_db(phagedb)
    protein_to_fasta(con)
    disconnect(con)
Exemple #2
0
def list_all_genomes():
    """
    Print all the proteins to stdout
    :return:  nothing
    """

    con = connect_to_db(phagedb)
    exc = con.cursor().execute("select description from genome")
    for d in exc.fetchall():
        print(f"{d[0]}")
    disconnect(con)
    Create a full text search virtual table on the protein products
    :param conn: the database connection
    :param verbose: more output
    :return: 
    """

    if verbose:
        sys.stderr.write(
            f"{color.GREEN}Adding full text search capabilities{color.ENDC}\n")

    c = conn.cursor()
    c.execute(
        "CREATE VIRTUAL TABLE protein_fts using FTS5(protein_rowid, product);")
    c.execute(
        "INSERT INTO protein_fts SELECT protein_rowid, product FROM protein;")
    conn.commit()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Load genbank data into an SQLite table')
    parser.add_argument('-f', help='GenBank file to parse', required=True)
    parser.add_argument('-p', help='Phage SQLite database', required=True)
    parser.add_argument('-v', help='verbose output', action='store_true')
    args = parser.parse_args()

    conn = connect_to_db(args.p, args.v)
    load_genbank_file(args.f, conn, args.v)
    create_full_text_search(conn, args.v)
    disconnect(conn, args.v)
Exemple #4
0
    Convert a protein ID to a dict object of all function
    :param proteinid: The protein md5 sum
    :param clusterdb_cursor: the cursor to the cluster database
    :param verbose: more output
    :return: dict: the functions of the protein and their frequency
    """

    global protein_functions

    if proteinid not in protein_functions:
        protein_functions[proteinid] = get_functions(proteinid, clusterdb_cursor, verbose)

    return json.loads(protein_functions[proteinid][1])


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=" ")
    parser.add_argument('-i', help='protein id', required=True)
    parser.add_argument('-c', help='cluster database', required=True)
    parser.add_argument('-v', help='verbose output', action='store_true')
    args = parser.parse_args()

    c= connect_to_db(args.c, args.v)
    fn = proteinid_to_function(args.i, c.cursor(), args.v)
    fns = proteinid_to_all_functions(args.i, c.cursor(), args.v)
    fnstr = "\n".join([f"{x} -> {str(y)}" for x,y in sorted(fns.items(), key=lambda item: item[1], reverse=True)])
    disconnect(c, args.v)

    print(f"The function of {args.i} is\n'{fn}'")
    print(f'All the functions are:\n{fnstr}')
Exemple #5
0
        description="Create a database and load it with GenBank data")
    parser.add_argument('-p', help='Phage SQL output database')
    parser.add_argument('-c', help='clusters SQLite database')
    parser.add_argument('-v', help='verbose output', action='store_true')
    args = parser.parse_args()

    if args.p:
        sys.stderr.write(
            f"{color.BOLD}{color.BLUE}Defining Phage Tables{color.ENDC}\n")
        if not os.path.exists(args.p):
            with open(args.p, 'w') as out:
                True
        phageconn = connect_to_db(args.p, args.v)
        define_phage_tables(phageconn, args.v)
        phageconn.commit()  # final commit to make sure everything saved!
        disconnect(phageconn, args.v)

    if args.c:
        sys.stderr.write(
            f"{color.BOLD}{color.BLUE}Defining Cluster Tables{color.ENDC}\n")
        if not os.path.exists(args.c):
            with open(args.c, 'w') as out:
                True
        clconn = connect_to_db(args.c, args.v)
        define_cluster_tables(clconn, args.v)
        clconn.commit()
        disconnect(clconn, args.v)

    if not args.p and not args.c:
        sys.stderr.write(f"{color.RED}Nothing to do!{color.ENDC}\n")
Exemple #6
0
"""

"""

import os
import sys
import argparse

from pppf_databases import connect_to_db, disconnect
from pppf_clusters import read_mmseqs_clusters, add_functions_to_clusters, insert_cluster_metadata, insert_into_database

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Load the cluster information into the databases')
    parser.add_argument('-p', '--phage', help='Phage SQL database', required=True)
    parser.add_argument('-c', '--clusters', help='Clusters SQL database', required=True)
    parser.add_argument('-t', '--tsv', help='Cluster tsv file', required=True)
    parser.add_argument('-n', '--name', help='Cluster name (short text)', required=True)
    parser.add_argument('-d', '--description', help='Cluster description (human readable text)', required=True)
    parser.add_argument('-c', '--cli', help='Cluster command line (bash)', required=True)
    parser.add_argument('-v', '--verbose', help='verbose output', action='store_true')
    args = parser.parse_args()

    phageconn = connect_to_db(args.phage, args.verbose)
    clconn = connect_to_db(args.clusters, args.verbose)
    clusters = read_mmseqs_clusters(args.tsv, args.verbose)
    (clusters, protein_info) = add_functions_to_clusters(clusters, phageconn, args.verbose)
    metadata_id = insert_cluster_metadata(clconn, args.name, args.description, args.cli, args.verbose)
    insert_into_database(clusters, clconn, phageconn, metadata_id, protein_info, args.verbose)
    disconnect(phageconn, args.verbose)
    disconnect(clconn, args.verbose)