Ejemplo n.º 1
0
    def test_create_tables(self):
        seed_data_util.create_tables(self.cursor)

        self.cursor.execute(
            'select count(*) from sqlite_master where type = ? and name = ?',
            ('table', 'seed_functional_roles'))
        self.assertEqual(self.cursor.fetchone()[0], 1)
        self.cursor.execute('SELECT COUNT(*) FROM seed_functional_roles')
        self.assertEqual(self.cursor.fetchone()[0], 0)

        self.cursor.execute(
            'select count(*) from sqlite_master where type = ? and name = ?',
            ('table', 'seed_genomes'))
        self.assertEqual(self.cursor.fetchone()[0], 1)
        self.cursor.execute('SELECT COUNT(*) FROM seed_genomes')
        self.assertEqual(self.cursor.fetchone()[0], 0)

        self.cursor.execute(
            'select count(*) from sqlite_master where type = ? and name = ?',
            ('table', 'seed_genes'))
        self.assertEqual(self.cursor.fetchone()[0], 1)
        self.cursor.execute('SELECT COUNT(*) FROM seed_genes')
        self.assertEqual(self.cursor.fetchone()[0], 0)

        self.cursor.execute(
            'select count(*) from sqlite_master where type = ? and name = ?',
            ('table', 'seed_gene2role'))
        self.assertEqual(self.cursor.fetchone()[0], 1)
        self.cursor.execute('SELECT COUNT(*) FROM seed_gene2role')
        self.assertEqual(self.cursor.fetchone()[0], 0)
Ejemplo n.º 2
0
    def test_import_collection_tsv(self):
        # Load test data
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_functional_roles_table(
            self.cursor, seed_roles_file)
        self.cursor.execute(
            'SELECT uid FROM seed_functional_roles WHERE seed_role_id IS ?',
            ('9724', ))

        kegg_data_util.create_kegg_orthologs_table(self.cursor)
        kegg_data_util.import_kegg_orthologs_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_ko_list.txt'))
        self.cursor.execute('SELECT COUNT(*) FROM kegg_orthologs')
        self.assertEqual(self.cursor.fetchone()[0], 1)

        # Prepare database
        db_utils.create_collections_table(self.cursor)
        db_utils.create_collection2function_table(self.cursor)

        # Import data
        data_analysis.import_collection_tsv(self.cursor, collection_file,
                                            'nitrogen_test', 'test info', '0')

        # Check results
        self.cursor.execute('SELECT COUNT(*) FROM collections')
        self.assertEqual(self.cursor.fetchone()[0], 1)
        self.cursor.execute('SELECT COUNT(*) FROM collection2function')
        self.assertEqual(self.cursor.fetchone()[0], 4)
        self.cursor.execute(
            'SELECT name FROM collection2function WHERE function_uid=?', (2, ))
        name = self.cursor.fetchone()[0]
        self.assertIsNotNone(name)
        self.assertEqual(name, 'Test_name2')
Ejemplo n.º 3
0
    def test_fill_seed2kegg_mappings_table(self):
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 4)
Ejemplo n.º 4
0
    def test_import_seed_genomes(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)

        self.cursor.execute(
            'SELECT tax_id FROM seed_genomes WHERE seed_genome_id IS ?',
            ('511145.12', ))
        self.assertEqual(self.cursor.fetchone()[0], u'511145')
        self.cursor.execute('SELECT COUNT(*) FROM seed_genomes')
        self.assertEqual(self.cursor.fetchone()[0], 4)
Ejemplo n.º 5
0
    def test_import_seed_gene2roles_mapping(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_functional_roles_table(
            self.cursor, seed_roles_file)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.import_seed_gene2roles_mapping(self.cursor,
                                                      seed_gene2roles_dir,
                                                      'test')

        self.cursor.execute('SELECT COUNT(*) FROM seed_gene2role')
        self.assertEqual(self.cursor.fetchone()[0], 6)
Ejemplo n.º 6
0
    def test_find_seed2uniref_identical_mappings(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        data_analysis.find_seed2uniref_identical_mappings(self.cursor)

        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)
Ejemplo n.º 7
0
    def test_load_diamond_search_results(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_file, 95.0, 5)

        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
Ejemplo n.º 8
0
    def test_import_seed_functional_roles_table(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_functional_roles_table(
            self.cursor, seed_roles_file)

        self.cursor.execute(
            'SELECT uid FROM seed_functional_roles WHERE seed_role_id IS ?',
            ('9724', ))
        self.assertEqual(self.cursor.fetchone()[0], 6)
        self.cursor.execute(
            'SELECT COUNT(*) FROM seed_functional_roles WHERE seed_role_id IS ?',
            ('9724', ))
        self.assertEqual(self.cursor.fetchone()[0], 1)
        self.cursor.execute('SELECT COUNT(*) FROM seed_functional_roles')
        self.assertEqual(self.cursor.fetchone()[0], 6)
Ejemplo n.º 9
0
    def setUp(self):
        self.conn = db_utils.connect_local_database(db_file)
        self.cursor = self.conn.cursor()

        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_functional_roles_table(
            self.cursor, seed_roles_file)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.import_seed_gene2roles_mapping(self.cursor,
                                                      seed_gene2roles_dir,
                                                      'test')
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)

        kegg_data_util.create_kegg_orthologs_table(self.cursor)
        kegg_data_util.import_kegg_orthologs_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_ko_list.txt'))
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg_genes2ko_table(self.cursor)
        kegg_data_util.import_genes2ko_mappings(self.cursor, test_kegg_dir)
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)

        db_utils.create_collections_table(self.cursor)
        db_utils.create_collection2function_table(self.cursor)
        data_analysis.import_collection_tsv(self.cursor, collection_file,
                                            'nitrogen_test', 'test info', '0')
Ejemplo n.º 10
0
def main():
    args = get_args()
    
    # Open database and prepare tables
    conn = db_utils.connect_local_database(args.db)
    c = conn.cursor()    
    seed_data_util.drop_tables(c)
    seed_data_util.drop_all_indices(c)
    seed_data_util.create_tables(c)

    # Import data
    seed_data_util.import_seed_functional_roles_table(c,args.seed_roles_file)
    seed_data_util.import_seed_genomes(c,args.seed_genome_file)
    seed_data_util.import_seed_genes(c,args.seed_prot_dir)
    seed_data_util.import_seed_gene2roles_mapping(c,args.seed_roles_dir,args.comment)
    
    # Save changes and close database   
    conn.commit()
    conn.close()
    def test_export_kegg_unmapped_proteins(self):
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 4)
        data_analysis.export_kegg_unmapped_proteins(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'),
            os.path.join(data_dir, 'out.fasta'))
        with open(os.path.join(data_dir, 'out.fasta'), 'r') as f:
            line = f.readline()
            self.assertEqual(line[:15], '>dml:Dmul_28240')
            f.closed
Ejemplo n.º 12
0
 def test_get_role_uid(self):
     seed_data_util.create_tables(self.cursor)
     seed_data_util.import_seed_functional_roles_table(
         self.cursor, seed_roles_file)
     self.assertEqual(seed_data_util.get_role_uid(self.cursor, '105'), 1)
     self.assertEqual(seed_data_util.get_role_uid(self.cursor, '2800'), 3)
Ejemplo n.º 13
0
 def test_import_seed_genes(self):
     seed_data_util.create_tables(self.cursor)
     seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
     seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
     self.cursor.execute('SELECT COUNT(*) FROM seed_genes')
     self.assertEqual(self.cursor.fetchone()[0], 8)