def test_fill_seed2kegg_mappings_table(self): uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor) seed_data_util.create_tables(self.cursor) seed_data_util.import_seed_genomes(self.cursor, seed_genome_file) seed_data_util.import_seed_genes(self.cursor, seed_gene_dir) seed_data_util.create_seed2uniref_mappings_table(self.cursor) seed_data_util.load_diamond_search_results(self.cursor, seed_diamond_output, 95.0, 5) self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings') self.assertEqual(self.cursor.fetchone()[0], 3) kegg_data_util.create_kegg_genomes_table(self.cursor) kegg_data_util.import_kegg_genomes_list( self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt')) kegg_data_util.create_kegg_genes_table(self.cursor) kegg_data_util.import_kegg_genes( self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta')) kegg_data_util.create_kegg2uniref_mappings_table(self.cursor) kegg_data_util.load_diamond_search_results(self.cursor, kegg_diamond_output, 95.0, 5) self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings') self.assertEqual(self.cursor.fetchone()[0], 2) seed_data_util.create_seed2kegg_mappings_table(self.cursor) data_analysis.fill_seed2kegg_mappings_table(self.cursor, seed2kegg_diamond_output, 95.0, 5) self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings') self.assertEqual(self.cursor.fetchone()[0], 4)
def test_uniref_import(self): uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor) self.cursor.execute('SELECT protein_hash FROM uniref_proteins WHERE uniref_id = ?', ('UniRef100_Q92AT0',)) self.assertEqual(self.cursor.fetchone()[0], u'B2E2EDF5A1AA957ADBAA08384F6BFB9D') self.cursor.execute('SELECT COUNT(*) FROM uniref_proteins') self.assertEqual(self.cursor.fetchone()[0], 7)
def setUp(self): self.conn = db_utils.connect_local_database(seed_db_file) self.cursor = self.conn.cursor() seed_data_util.create_seed_genomes_table(self.cursor) seed_data_util.import_seed_genomes(self.cursor, os.path.join(data_dir, 'test_seed_genomes.txt')) seed_data_util.create_seed_genes_table(self.cursor) seed_data_util.import_seed_genes(self.cursor, test_seed_dir) db_utils.attach_local_database(self.cursor, uniref_db_file, 'uniref_proteins') uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor)
def test_find_seed2uniref_identical_mappings(self): seed_data_util.create_tables(self.cursor) seed_data_util.create_seed2uniref_mappings_table(self.cursor) uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor) seed_data_util.import_seed_genomes(self.cursor, seed_genome_file) seed_data_util.import_seed_genes(self.cursor, seed_gene_dir) data_analysis.find_seed2uniref_identical_mappings(self.cursor) self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings') self.assertEqual(self.cursor.fetchone()[0], 2)
def test_load_diamond_search_results(self): seed_data_util.create_tables(self.cursor) seed_data_util.create_seed2uniref_mappings_table(self.cursor) uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor) seed_data_util.import_seed_genomes(self.cursor, seed_genome_file) seed_data_util.import_seed_genes(self.cursor, seed_gene_dir) seed_data_util.load_diamond_search_results(self.cursor, seed_diamond_file, 95.0, 5) self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings') self.assertEqual(self.cursor.fetchone()[0], 3)
def main(): args = get_args() conn = db_utils.connect_local_database(args.db) c = conn.cursor() uniref_data_util.drop_tables(c) uniref_data_util.drop_indices(c) uniref_data_util.create_uniref_proteins_table(c) uniref_data_util.import_uniref_fasta(c,args.fasta) uniref_data_util.create_uniref_proteins_indices(c) conn.commit() conn.close()
def setUp(self): self.conn = db_utils.connect_local_database( os.path.join(data_dir, kegg_db_file)) self.cursor = self.conn.cursor() db_utils.attach_local_database(self.cursor, os.path.join(data_dir, uniref_db_file), 'uniref_proteins') kegg_data_util.create_kegg_genomes_table(self.cursor) kegg_data_util.import_kegg_genomes_list( self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt')) kegg_data_util.create_kegg_genes_table(self.cursor) kegg_data_util.import_kegg_genes( self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta')) uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor)
def setUp(self): self.conn = db_utils.connect_local_database(db_file) self.cursor = self.conn.cursor() uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor) seed_data_util.create_tables(self.cursor) seed_data_util.import_seed_functional_roles_table( self.cursor, seed_roles_file) seed_data_util.import_seed_genomes(self.cursor, seed_genome_file) seed_data_util.import_seed_genes(self.cursor, seed_gene_dir) seed_data_util.import_seed_gene2roles_mapping(self.cursor, seed_gene2roles_dir, 'test') seed_data_util.create_seed2uniref_mappings_table(self.cursor) seed_data_util.load_diamond_search_results(self.cursor, seed_diamond_output, 95.0, 5) kegg_data_util.create_kegg_orthologs_table(self.cursor) kegg_data_util.import_kegg_orthologs_list( self.cursor, os.path.join(test_kegg_dir, 'kegg_ko_list.txt')) kegg_data_util.create_kegg_genomes_table(self.cursor) kegg_data_util.import_kegg_genomes_list( self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt')) kegg_data_util.create_kegg_genes_table(self.cursor) kegg_data_util.import_kegg_genes( self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta')) kegg_data_util.create_kegg_genes2ko_table(self.cursor) kegg_data_util.import_genes2ko_mappings(self.cursor, test_kegg_dir) kegg_data_util.create_kegg2uniref_mappings_table(self.cursor) kegg_data_util.load_diamond_search_results(self.cursor, kegg_diamond_output, 95.0, 5) seed_data_util.create_seed2kegg_mappings_table(self.cursor) data_analysis.fill_seed2kegg_mappings_table(self.cursor, seed2kegg_diamond_output, 95.0, 5) db_utils.create_collections_table(self.cursor) db_utils.create_collection2function_table(self.cursor) data_analysis.import_collection_tsv(self.cursor, collection_file, 'nitrogen_test', 'test info', '0')
def test_export_kegg_unmapped_proteins(self): uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor) seed_data_util.create_tables(self.cursor) seed_data_util.import_seed_genomes(self.cursor, seed_genome_file) seed_data_util.import_seed_genes(self.cursor, seed_gene_dir) seed_data_util.create_seed2uniref_mappings_table(self.cursor) seed_data_util.load_diamond_search_results(self.cursor, seed_diamond_output, 95.0, 5) self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings') self.assertEqual(self.cursor.fetchone()[0], 3) kegg_data_util.create_kegg_genomes_table(self.cursor) kegg_data_util.import_kegg_genomes_list( self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt')) kegg_data_util.create_kegg_genes_table(self.cursor) kegg_data_util.import_kegg_genes( self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta')) kegg_data_util.create_kegg2uniref_mappings_table(self.cursor) kegg_data_util.load_diamond_search_results(self.cursor, kegg_diamond_output, 95.0, 5) self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings') self.assertEqual(self.cursor.fetchone()[0], 2) seed_data_util.create_seed2kegg_mappings_table(self.cursor) data_analysis.fill_seed2kegg_mappings_table(self.cursor, seed2kegg_diamond_output, 95.0, 5) self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings') self.assertEqual(self.cursor.fetchone()[0], 4) data_analysis.export_kegg_unmapped_proteins( self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'), os.path.join(data_dir, 'out.fasta')) with open(os.path.join(data_dir, 'out.fasta'), 'r') as f: line = f.readline() self.assertEqual(line[:15], '>dml:Dmul_28240') f.closed