Esempio n. 1
0
    def test_fill_seed2kegg_mappings_table(self):
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 4)
Esempio n. 2
0
 def test_import_kegg_genes(self):
     kegg_data_util.create_kegg_genomes_table(self.cursor)
     kegg_data_util.import_kegg_genomes_list(
         self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
     kegg_data_util.create_kegg_genes_table(self.cursor)
     kegg_data_util.import_kegg_genes(
         self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
     self.cursor.execute('SELECT COUNT(*) FROM kegg_genes')
     self.assertEqual(self.cursor.fetchone()[0], 3)
 def setUp(self):
     self.conn = db_utils.connect_local_database(
         os.path.join(data_dir, kegg_db_file))
     self.cursor = self.conn.cursor()
     db_utils.attach_local_database(self.cursor,
                                    os.path.join(data_dir, uniref_db_file),
                                    'uniref_proteins')
     kegg_data_util.create_kegg_genomes_table(self.cursor)
     kegg_data_util.import_kegg_genomes_list(
         self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
     kegg_data_util.create_kegg_genes_table(self.cursor)
     kegg_data_util.import_kegg_genes(
         self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
     uniref_data_util.create_uniref_proteins_table(self.cursor)
     uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
     uniref_data_util.create_uniref_proteins_indices(self.cursor)
    def setUp(self):
        self.conn = db_utils.connect_local_database(db_file)
        self.cursor = self.conn.cursor()

        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)

        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_functional_roles_table(
            self.cursor, seed_roles_file)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.import_seed_gene2roles_mapping(self.cursor,
                                                      seed_gene2roles_dir,
                                                      'test')
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)

        kegg_data_util.create_kegg_orthologs_table(self.cursor)
        kegg_data_util.import_kegg_orthologs_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_ko_list.txt'))
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg_genes2ko_table(self.cursor)
        kegg_data_util.import_genes2ko_mappings(self.cursor, test_kegg_dir)
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)

        db_utils.create_collections_table(self.cursor)
        db_utils.create_collection2function_table(self.cursor)
        data_analysis.import_collection_tsv(self.cursor, collection_file,
                                            'nitrogen_test', 'test info', '0')
Esempio n. 5
0
def main():
    args = get_args()

    # Check if KEGG data directory contains all required files
    if kegg_data_util.kegg_dir_is_valid(args.kegg_dir) == False:
        print('Some required files are missing from ', args.kegg_dir,
              '. Data import failed.')
        sys.exit(1)

    conn = db_utils.connect_local_database(args.db)
    c = conn.cursor()

    # Prepare database
    print('Drop genes2ko table...')
    kegg_data_util.drop_kegg_genes2ko_table(c)
    print('Drop KEGG genes table...')
    kegg_data_util.drop_kegg_genes_table(c)
    print('Drop KEGG genomes table...')
    kegg_data_util.drop_kegg_genomes_table(c)
    print('Drop KEGG orthologs table...')
    kegg_data_util.drop_kegg_orthologs_table(c)
    print('Drop database indices...')
    kegg_data_util.drop_indices(c)
    print('Create KEGG orthologs table...')
    kegg_data_util.create_kegg_orthologs_table(c)
    print('Create KEGG genomes table...')
    kegg_data_util.create_kegg_genomes_table(c)
    print('Create KEGG genes table...')
    kegg_data_util.create_kegg_genes_table(c)
    print('Create genes2ko table...')
    kegg_data_util.create_kegg_genes2ko_table(c)

    # Import data
    kegg_data_util.import_kegg_orthologs_list(
        c, os.path.join(args.kegg_dir, 'kegg_ko_list.txt'))
    kegg_data_util.import_kegg_genomes_list(
        c, os.path.join(args.kegg_dir, 'kegg_genomes.txt'))
    kegg_data_util.import_kegg_genes(
        c, os.path.join(args.kegg_dir, 'ko_proteins_nr.fasta'))
    kegg_data_util.import_genes2ko_mappings(c, args.kegg_dir)

    conn.commit()
    conn.close()
    def test_export_kegg_unmapped_proteins(self):
        uniref_data_util.create_uniref_proteins_table(self.cursor)
        uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
        uniref_data_util.create_uniref_proteins_indices(self.cursor)
        seed_data_util.create_tables(self.cursor)
        seed_data_util.import_seed_genomes(self.cursor, seed_genome_file)
        seed_data_util.import_seed_genes(self.cursor, seed_gene_dir)
        seed_data_util.create_seed2uniref_mappings_table(self.cursor)
        seed_data_util.load_diamond_search_results(self.cursor,
                                                   seed_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 3)
        kegg_data_util.create_kegg_genomes_table(self.cursor)
        kegg_data_util.import_kegg_genomes_list(
            self.cursor, os.path.join(test_kegg_dir, 'kegg_genomes.txt'))
        kegg_data_util.create_kegg_genes_table(self.cursor)
        kegg_data_util.import_kegg_genes(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'))
        kegg_data_util.create_kegg2uniref_mappings_table(self.cursor)
        kegg_data_util.load_diamond_search_results(self.cursor,
                                                   kegg_diamond_output, 95.0,
                                                   5)
        self.cursor.execute('SELECT COUNT(*) FROM kegg2uniref_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 2)

        seed_data_util.create_seed2kegg_mappings_table(self.cursor)
        data_analysis.fill_seed2kegg_mappings_table(self.cursor,
                                                    seed2kegg_diamond_output,
                                                    95.0, 5)
        self.cursor.execute('SELECT COUNT(*) FROM seed2kegg_mappings')
        self.assertEqual(self.cursor.fetchone()[0], 4)
        data_analysis.export_kegg_unmapped_proteins(
            self.cursor, os.path.join(test_kegg_dir, 'ko_proteins_nr.fasta'),
            os.path.join(data_dir, 'out.fasta'))
        with open(os.path.join(data_dir, 'out.fasta'), 'r') as f:
            line = f.readline()
            self.assertEqual(line[:15], '>dml:Dmul_28240')
            f.closed