def main():
    args = get_args()

    # Open database
    conn = db_utils.connect_local_database(args.seed_db)
    c = conn.cursor()
    db_utils.attach_local_database(c, args.uniref_db, 'uniref_proteins')

    # Prepare database
    print('Drop seed2uniref_mappings table...')
    seed_data_util.drop_seed2uniref_mappings_table(c)
    print('Create seed2uniref_mappings table...')
    seed_data_util.create_seed2uniref_mappings_table(c)

    # Import data
    print('Find genes with identical hashes...')
    data_analysis.find_seed2uniref_identical_mappings(c)
    print('Get genes from DIAMOND output...')
    seed_data_util.load_diamond_search_results(c, args.diamond_out, 95.0, 5)

    # Write changes and close database
    print('Saving database...', end='')
    conn.commit()
    conn.close()
    print('done.')
Ejemplo n.º 2
0
 def setUp(self):
     self.conn = db_utils.connect_local_database(seed_db_file)
     self.cursor = self.conn.cursor()
     seed_data_util.create_seed_genomes_table(self.cursor)
     seed_data_util.import_seed_genomes(self.cursor, os.path.join(data_dir, 'test_seed_genomes.txt'))
     seed_data_util.create_seed_genes_table(self.cursor)
     seed_data_util.import_seed_genes(self.cursor, test_seed_dir)
     db_utils.attach_local_database(self.cursor, uniref_db_file, 'uniref_proteins')
     uniref_data_util.create_uniref_proteins_table(self.cursor)
     uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file)
     uniref_data_util.create_uniref_proteins_indices(self.cursor)
def main():
    args = get_args()

    # Open database
    conn = db_utils.connect_local_database(args.kegg_db)
    c = conn.cursor()
    db_utils.attach_local_database(c, args.seed_db, 'seed_data')
    print('Finding unmapped proteins...')
    data_analysis.export_kegg_unmapped_proteins(c, args.infile, args.outfile)
    conn.close()
    print('done.')
Ejemplo n.º 4
0
def main():
    args = get_args()

    # Open database
    conn = db_utils.connect_local_database(args.seed_db)
    c = conn.cursor()
    db_utils.attach_local_database(c, args.kegg_db, 'kegg_data')
    db_utils.create_collections_table(c)
    db_utils.create_collection2function_table(c)
    data_analysis.import_collection_tsv(c, args.infile, args.name, args.info,
                                        args.ver)
    conn.commit()
    conn.close()
    print('done.')
Ejemplo n.º 5
0
def main():
    args = get_args()
    
    conn = db_utils.connect_local_database(args.db)
    c = conn.cursor()
    
    uniref_data_util.drop_tables(c)
    uniref_data_util.drop_indices(c)
    uniref_data_util.create_uniref_proteins_table(c)
    uniref_data_util.import_uniref_fasta(c,args.fasta)
    uniref_data_util.create_uniref_proteins_indices(c)
    
    conn.commit()
    conn.close()
Ejemplo n.º 6
0
def main():
    args = get_args()
    
    # Open database and prepare tables
    conn = db_utils.connect_local_database(args.db)
    c = conn.cursor()    
    seed_data_util.drop_tables(c)
    seed_data_util.drop_all_indices(c)
    seed_data_util.create_tables(c)

    # Import data
    seed_data_util.import_seed_functional_roles_table(c,args.seed_roles_file)
    seed_data_util.import_seed_genomes(c,args.seed_genome_file)
    seed_data_util.import_seed_genes(c,args.seed_prot_dir)
    seed_data_util.import_seed_gene2roles_mapping(c,args.seed_roles_dir,args.comment)
    
    # Save changes and close database   
    conn.commit()
    conn.close()
Ejemplo n.º 7
0
def main():
    args = get_args()

    # Check if KEGG data directory contains all required files
    if kegg_data_util.kegg_dir_is_valid(args.kegg_dir) == False:
        print('Some required files are missing from ', args.kegg_dir,
              '. Data import failed.')
        sys.exit(1)

    conn = db_utils.connect_local_database(args.db)
    c = conn.cursor()

    # Prepare database
    print('Drop genes2ko table...')
    kegg_data_util.drop_kegg_genes2ko_table(c)
    print('Drop KEGG genes table...')
    kegg_data_util.drop_kegg_genes_table(c)
    print('Drop KEGG genomes table...')
    kegg_data_util.drop_kegg_genomes_table(c)
    print('Drop KEGG orthologs table...')
    kegg_data_util.drop_kegg_orthologs_table(c)
    print('Drop database indices...')
    kegg_data_util.drop_indices(c)
    print('Create KEGG orthologs table...')
    kegg_data_util.create_kegg_orthologs_table(c)
    print('Create KEGG genomes table...')
    kegg_data_util.create_kegg_genomes_table(c)
    print('Create KEGG genes table...')
    kegg_data_util.create_kegg_genes_table(c)
    print('Create genes2ko table...')
    kegg_data_util.create_kegg_genes2ko_table(c)

    # Import data
    kegg_data_util.import_kegg_orthologs_list(
        c, os.path.join(args.kegg_dir, 'kegg_ko_list.txt'))
    kegg_data_util.import_kegg_genomes_list(
        c, os.path.join(args.kegg_dir, 'kegg_genomes.txt'))
    kegg_data_util.import_kegg_genes(
        c, os.path.join(args.kegg_dir, 'ko_proteins_nr.fasta'))
    kegg_data_util.import_genes2ko_mappings(c, args.kegg_dir)

    conn.commit()
    conn.close()
def main():
    args = get_args()

    # Open database
    conn = db_utils.connect_local_database(args.seed_db)
    c = conn.cursor()
    db_utils.attach_local_database(c, args.kegg_db, 'kegg_data')
    db_utils.attach_local_database(c, args.uniref_db, 'uniref_proteins')

    # Prepare database
    seed_data_util.drop_seed2kegg_mappings_table(c)
    print('Creating seed2kegg_mappings table...')
    seed_data_util.create_seed2kegg_mappings_table(c)
    print('Populating seed2kegg_mappings table...')
    data_analysis.fill_seed2kegg_mappings_table(c, args.diamond_out, 95.0, 5)

    # Write changes and close database
    print('Saving database...', end='')
    conn.commit()
    conn.close()
    print('done.')
Ejemplo n.º 9
0
def main():
    args = get_args()

    # Open database
    conn = db_utils.connect_local_database(args.seed_db)
    c = conn.cursor()

    # Prepare database
    print('Creating gene2role_changes table...')
    seed_data_util.create_gene2role_changes_table(c)

    # Import data
    print('Verifying input file and making changes in DB...')
    data_analysis.correct_seed_annotations(c, args.infile, args.comment)
    seed_data_util.create_gene2role_changes_indices(c)

    # Write changes and close database
    print('Saving database...', end='')
    conn.commit()
    conn.close()
    print('done.')
Ejemplo n.º 10
0
 def setUp(self):
     self.conn = db_utils.connect_local_database(db_file)
     self.cursor = self.conn.cursor()
     db_utils.attach_local_database(self.cursor, uniref_db_file,
                                    'uniref_proteins')