def print_all_proteins(): """ Print all the proteins to stdout :return: nothing """ con = connect_to_db(phagedb) protein_to_fasta(con) disconnect(con)
def list_all_genomes(): """ Print all the proteins to stdout :return: nothing """ con = connect_to_db(phagedb) exc = con.cursor().execute("select description from genome") for d in exc.fetchall(): print(f"{d[0]}") disconnect(con)
Create a full text search virtual table on the protein products :param conn: the database connection :param verbose: more output :return: """ if verbose: sys.stderr.write( f"{color.GREEN}Adding full text search capabilities{color.ENDC}\n") c = conn.cursor() c.execute( "CREATE VIRTUAL TABLE protein_fts using FTS5(protein_rowid, product);") c.execute( "INSERT INTO protein_fts SELECT protein_rowid, product FROM protein;") conn.commit() if __name__ == '__main__': parser = argparse.ArgumentParser( description='Load genbank data into an SQLite table') parser.add_argument('-f', help='GenBank file to parse', required=True) parser.add_argument('-p', help='Phage SQLite database', required=True) parser.add_argument('-v', help='verbose output', action='store_true') args = parser.parse_args() conn = connect_to_db(args.p, args.v) load_genbank_file(args.f, conn, args.v) create_full_text_search(conn, args.v) disconnect(conn, args.v)
Convert a protein ID to a dict object of all function :param proteinid: The protein md5 sum :param clusterdb_cursor: the cursor to the cluster database :param verbose: more output :return: dict: the functions of the protein and their frequency """ global protein_functions if proteinid not in protein_functions: protein_functions[proteinid] = get_functions(proteinid, clusterdb_cursor, verbose) return json.loads(protein_functions[proteinid][1]) if __name__ == '__main__': parser = argparse.ArgumentParser(description=" ") parser.add_argument('-i', help='protein id', required=True) parser.add_argument('-c', help='cluster database', required=True) parser.add_argument('-v', help='verbose output', action='store_true') args = parser.parse_args() c= connect_to_db(args.c, args.v) fn = proteinid_to_function(args.i, c.cursor(), args.v) fns = proteinid_to_all_functions(args.i, c.cursor(), args.v) fnstr = "\n".join([f"{x} -> {str(y)}" for x,y in sorted(fns.items(), key=lambda item: item[1], reverse=True)]) disconnect(c, args.v) print(f"The function of {args.i} is\n'{fn}'") print(f'All the functions are:\n{fnstr}')
description="Create a database and load it with GenBank data") parser.add_argument('-p', help='Phage SQL output database') parser.add_argument('-c', help='clusters SQLite database') parser.add_argument('-v', help='verbose output', action='store_true') args = parser.parse_args() if args.p: sys.stderr.write( f"{color.BOLD}{color.BLUE}Defining Phage Tables{color.ENDC}\n") if not os.path.exists(args.p): with open(args.p, 'w') as out: True phageconn = connect_to_db(args.p, args.v) define_phage_tables(phageconn, args.v) phageconn.commit() # final commit to make sure everything saved! disconnect(phageconn, args.v) if args.c: sys.stderr.write( f"{color.BOLD}{color.BLUE}Defining Cluster Tables{color.ENDC}\n") if not os.path.exists(args.c): with open(args.c, 'w') as out: True clconn = connect_to_db(args.c, args.v) define_cluster_tables(clconn, args.v) clconn.commit() disconnect(clconn, args.v) if not args.p and not args.c: sys.stderr.write(f"{color.RED}Nothing to do!{color.ENDC}\n")
""" """ import os import sys import argparse from pppf_databases import connect_to_db, disconnect from pppf_clusters import read_mmseqs_clusters, add_functions_to_clusters, insert_cluster_metadata, insert_into_database if __name__ == '__main__': parser = argparse.ArgumentParser(description='Load the cluster information into the databases') parser.add_argument('-p', '--phage', help='Phage SQL database', required=True) parser.add_argument('-c', '--clusters', help='Clusters SQL database', required=True) parser.add_argument('-t', '--tsv', help='Cluster tsv file', required=True) parser.add_argument('-n', '--name', help='Cluster name (short text)', required=True) parser.add_argument('-d', '--description', help='Cluster description (human readable text)', required=True) parser.add_argument('-c', '--cli', help='Cluster command line (bash)', required=True) parser.add_argument('-v', '--verbose', help='verbose output', action='store_true') args = parser.parse_args() phageconn = connect_to_db(args.phage, args.verbose) clconn = connect_to_db(args.clusters, args.verbose) clusters = read_mmseqs_clusters(args.tsv, args.verbose) (clusters, protein_info) = add_functions_to_clusters(clusters, phageconn, args.verbose) metadata_id = insert_cluster_metadata(clconn, args.name, args.description, args.cli, args.verbose) insert_into_database(clusters, clconn, phageconn, metadata_id, protein_info, args.verbose) disconnect(phageconn, args.verbose) disconnect(clconn, args.verbose)