def main(): args = get_args() # Open database conn = db_utils.connect_local_database(args.seed_db) c = conn.cursor() db_utils.attach_local_database(c, args.uniref_db, 'uniref_proteins') # Prepare database print('Drop seed2uniref_mappings table...') seed_data_util.drop_seed2uniref_mappings_table(c) print('Create seed2uniref_mappings table...') seed_data_util.create_seed2uniref_mappings_table(c) # Import data print('Find genes with identical hashes...') data_analysis.find_seed2uniref_identical_mappings(c) print('Get genes from DIAMOND output...') seed_data_util.load_diamond_search_results(c, args.diamond_out, 95.0, 5) # Write changes and close database print('Saving database...', end='') conn.commit() conn.close() print('done.')
def setUp(self): self.conn = db_utils.connect_local_database(seed_db_file) self.cursor = self.conn.cursor() seed_data_util.create_seed_genomes_table(self.cursor) seed_data_util.import_seed_genomes(self.cursor, os.path.join(data_dir, 'test_seed_genomes.txt')) seed_data_util.create_seed_genes_table(self.cursor) seed_data_util.import_seed_genes(self.cursor, test_seed_dir) db_utils.attach_local_database(self.cursor, uniref_db_file, 'uniref_proteins') uniref_data_util.create_uniref_proteins_table(self.cursor) uniref_data_util.import_uniref_fasta(self.cursor, uniref_fasta_file) uniref_data_util.create_uniref_proteins_indices(self.cursor)
def main(): args = get_args() # Open database conn = db_utils.connect_local_database(args.kegg_db) c = conn.cursor() db_utils.attach_local_database(c, args.seed_db, 'seed_data') print('Finding unmapped proteins...') data_analysis.export_kegg_unmapped_proteins(c, args.infile, args.outfile) conn.close() print('done.')
def main(): args = get_args() # Open database conn = db_utils.connect_local_database(args.seed_db) c = conn.cursor() db_utils.attach_local_database(c, args.kegg_db, 'kegg_data') db_utils.create_collections_table(c) db_utils.create_collection2function_table(c) data_analysis.import_collection_tsv(c, args.infile, args.name, args.info, args.ver) conn.commit() conn.close() print('done.')
def main(): args = get_args() conn = db_utils.connect_local_database(args.db) c = conn.cursor() uniref_data_util.drop_tables(c) uniref_data_util.drop_indices(c) uniref_data_util.create_uniref_proteins_table(c) uniref_data_util.import_uniref_fasta(c,args.fasta) uniref_data_util.create_uniref_proteins_indices(c) conn.commit() conn.close()
def main(): args = get_args() # Open database and prepare tables conn = db_utils.connect_local_database(args.db) c = conn.cursor() seed_data_util.drop_tables(c) seed_data_util.drop_all_indices(c) seed_data_util.create_tables(c) # Import data seed_data_util.import_seed_functional_roles_table(c,args.seed_roles_file) seed_data_util.import_seed_genomes(c,args.seed_genome_file) seed_data_util.import_seed_genes(c,args.seed_prot_dir) seed_data_util.import_seed_gene2roles_mapping(c,args.seed_roles_dir,args.comment) # Save changes and close database conn.commit() conn.close()
def main(): args = get_args() # Check if KEGG data directory contains all required files if kegg_data_util.kegg_dir_is_valid(args.kegg_dir) == False: print('Some required files are missing from ', args.kegg_dir, '. Data import failed.') sys.exit(1) conn = db_utils.connect_local_database(args.db) c = conn.cursor() # Prepare database print('Drop genes2ko table...') kegg_data_util.drop_kegg_genes2ko_table(c) print('Drop KEGG genes table...') kegg_data_util.drop_kegg_genes_table(c) print('Drop KEGG genomes table...') kegg_data_util.drop_kegg_genomes_table(c) print('Drop KEGG orthologs table...') kegg_data_util.drop_kegg_orthologs_table(c) print('Drop database indices...') kegg_data_util.drop_indices(c) print('Create KEGG orthologs table...') kegg_data_util.create_kegg_orthologs_table(c) print('Create KEGG genomes table...') kegg_data_util.create_kegg_genomes_table(c) print('Create KEGG genes table...') kegg_data_util.create_kegg_genes_table(c) print('Create genes2ko table...') kegg_data_util.create_kegg_genes2ko_table(c) # Import data kegg_data_util.import_kegg_orthologs_list( c, os.path.join(args.kegg_dir, 'kegg_ko_list.txt')) kegg_data_util.import_kegg_genomes_list( c, os.path.join(args.kegg_dir, 'kegg_genomes.txt')) kegg_data_util.import_kegg_genes( c, os.path.join(args.kegg_dir, 'ko_proteins_nr.fasta')) kegg_data_util.import_genes2ko_mappings(c, args.kegg_dir) conn.commit() conn.close()
def main(): args = get_args() # Open database conn = db_utils.connect_local_database(args.seed_db) c = conn.cursor() db_utils.attach_local_database(c, args.kegg_db, 'kegg_data') db_utils.attach_local_database(c, args.uniref_db, 'uniref_proteins') # Prepare database seed_data_util.drop_seed2kegg_mappings_table(c) print('Creating seed2kegg_mappings table...') seed_data_util.create_seed2kegg_mappings_table(c) print('Populating seed2kegg_mappings table...') data_analysis.fill_seed2kegg_mappings_table(c, args.diamond_out, 95.0, 5) # Write changes and close database print('Saving database...', end='') conn.commit() conn.close() print('done.')
def main(): args = get_args() # Open database conn = db_utils.connect_local_database(args.seed_db) c = conn.cursor() # Prepare database print('Creating gene2role_changes table...') seed_data_util.create_gene2role_changes_table(c) # Import data print('Verifying input file and making changes in DB...') data_analysis.correct_seed_annotations(c, args.infile, args.comment) seed_data_util.create_gene2role_changes_indices(c) # Write changes and close database print('Saving database...', end='') conn.commit() conn.close() print('done.')
def setUp(self): self.conn = db_utils.connect_local_database(db_file) self.cursor = self.conn.cursor() db_utils.attach_local_database(self.cursor, uniref_db_file, 'uniref_proteins')