コード例 #1
0
    def test_fill_seed2kegg_mappings_table(self):
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 4)
コード例 #2
0
    def test_import_seed_genomes(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)

        self.cursor.execute(
            'SELECT tax_id FROM seed_genomes WHERE seed_genome_id IS ?',
            ('511145.12', ))
        self.assertEqual(self.cursor.fetchone()[0], u'511145')
        self.cursor.execute('SELECT COUNT(*) FROM seed_genomes')
        self.assertEqual(self.cursor.fetchone()[0], 4)
コード例 #3
0
 def setUp(self):
     self.conn = db_utils.connect_local_database(seed_db_file)
     self.cursor = self.conn.cursor()
     seed_data_util.create_seed_genomes_table(self.cursor)
     seed_data_util.import_seed_genomes(self.cursor, os.path.join(data_dir, 'test_seed_genomes.txt'))
     seed_data_util.create_seed_genes_table(self.cursor)
     seed_data_util.import_seed_genes(self.cursor, test_seed_dir)
     db_utils.attach_local_database(self.cursor, uniref_db_file, 'uniref_proteins')
     uniref_data_util.create_uniref_proteins_table(self.cursor)
     uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
     uniref_data_util.create_uniref_proteins_indices(self.cursor)
コード例 #4
0
    def test_import_seed_gene2roles_mapping(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_functional_roles_table(
            self.cursor, seed_roles_file)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.import_seed_gene2roles_mapping(self.cursor,
                                                      seed_gene2roles_dir,
                                                      'test')

        self.cursor.execute('SELECT COUNT(*) FROM seed_gene2role')
        self.assertEqual(self.cursor.fetchone()[0], 6)
コード例 #5
0
    def test_find_seed2uniref_identical_mappings(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        data_analysis.find_seed2uniref_identical_mappings(self.cursor)

        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)
コード例 #6
0
    def test_load_diamond_search_results(self):
        seed_data_util.create_tables(self.cursor)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_file, 95.0, 5)

        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
コード例 #7
0
    def setUp(self):
        self.conn = db_utils.connect_local_database(db_file)
        self.cursor = self.conn.cursor()

        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_functional_roles_table(
            self.cursor, seed_roles_file)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.import_seed_gene2roles_mapping(self.cursor,
                                                      seed_gene2roles_dir,
                                                      'test')
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)

        kegg_data_util.create_kegg_orthologs_table(self.cursor)
        kegg_data_util.import_kegg_orthologs_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_ko_list.txt'))
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg_genes2ko_table(self.cursor)
        kegg_data_util.import_genes2ko_mappings(self.cursor, test_kegg_dir)
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)

        db_utils.create_collections_table(self.cursor)
        db_utils.create_collection2function_table(self.cursor)
        data_analysis.import_collection_tsv(self.cursor, collection_file,
                                            'nitrogen_test', 'test info', '0')
コード例 #8
0
def main():
    args = get_args()
    
    # Open database and prepare tables
    conn = db_utils.connect_local_database(args.db)
    c = conn.cursor()    
    seed_data_util.drop_tables(c)
    seed_data_util.drop_all_indices(c)
    seed_data_util.create_tables(c)

    # Import data
    seed_data_util.import_seed_functional_roles_table(c,args.seed_roles_file)
    seed_data_util.import_seed_genomes(c,args.seed_genome_file)
    seed_data_util.import_seed_genes(c,args.seed_prot_dir)
    seed_data_util.import_seed_gene2roles_mapping(c,args.seed_roles_dir,args.comment)
    
    # Save changes and close database   
    conn.commit()
    conn.close()
コード例 #9
0
    def test_export_kegg_unmapped_proteins(self):
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 4)
        data_analysis.export_kegg_unmapped_proteins(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'),
            os.path.join(data_dir, 'out.fasta'))
        with open(os.path.join(data_dir, 'out.fasta'), 'r') as f:
            line = f.readline()
            self.assertEqual(line[:15], '>dml:Dmul_28240')
            f.closed
コード例 #10
0
 def test_import_seed_genes(self):
     seed_data_util.create_tables(self.cursor)
     seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
     seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
     self.cursor.execute('SELECT COUNT(*) FROM seed_genes')
     self.assertEqual(self.cursor.fetchone()[0], 8)