def main(argv=None): opts = parse_args(argv) # Configure if opts.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) taxa_db = NcbiEutils(opts.cache_fp) taxa_db.load_cache() consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS] assigner = Assigner(opts.min_cover, opts.min_species_id, opts.min_genus_id, opts.min_id, consensus_thresholds, opts.max_generic, taxa_db) # Read input files with open(opts.fasta_file) as f: sequences = list(iter_fasta(f)) with open(opts.blast_file) as f: blast_hits = read_blast(f) # Open output files if not os.path.exists(opts.output_directory): os.mkdir(opts.output_directory) output_file = open( os.path.join(opts.output_directory, "Full_Taxonomy.txt"), 'w') standard_taxa_file = open( os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w") log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w") log_file.write( "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t" "Classification\n") # Do the work for name, seq in sequences: seq_hits = blast_hits[name] # This is where the magic happens a = assigner.assign(name, seq, seq_hits) output_file.write(a.format_for_full_taxonomy()) standard_taxa_file.write(a.format_for_standard_taxonomy()) log_file.write(a.format_for_log()) # Close output files, write cache output_file.close() standard_taxa_file.close() log_file.close() taxa_db.save_cache()
def main(argv=None): opts = parse_args(argv) # Configure if opts.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) taxa_db = NcbiEutils(opts.cache_fp) taxa_db.load_cache() consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS] assigner = Assigner( opts.min_cover, opts.min_species_id, opts.min_genus_id, opts.min_id, consensus_thresholds, opts.max_generic, taxa_db) # Read input files with open(opts.fasta_file) as f: sequences = list(iter_fasta(f)) with open(opts.blast_file) as f: blast_hits = read_blast(f) # Open output files if not os.path.exists(opts.output_directory): os.mkdir(opts.output_directory) output_file = open( os.path.join(opts.output_directory, "Full_Taxonomy.txt"), 'w') standard_taxa_file = open( os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w") log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w") log_file.write( "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t" "Classification\n") # Do the work for name, seq in sequences: seq_hits = blast_hits[name] # This is where the magic happens a = assigner.assign(name, seq, seq_hits) output_file.write(a.format_for_full_taxonomy()) standard_taxa_file.write(a.format_for_standard_taxonomy()) log_file.write(a.format_for_log()) # Close output files, write cache output_file.close() standard_taxa_file.close() log_file.close() taxa_db.save_cache()
def test_missing_read(self): obs = read_blast(StringIO(normal_output)) self.assertEqual(obs['sdlkj'], [])
def test_malformed_output(self): obs = read_blast(StringIO(malformed_output)) h = obs['0 E7_168192'][0] self.assertEqual(h.accession, "GQ513762.1") self.assertEqual(h.pct_id, 98.74) self.assertEqual(h.length, 159)
def test_malformed_output(self): obs = read_blast(StringIO(malformed_output)) h = obs['0 E7_168192'][0] self.assertEqual(h.gi, "259100874") self.assertEqual(h.pct_id, 98.74) self.assertEqual(h.length, 159)
def main(argv=None): opts = parse_args(argv) # Configure if opts.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) if os.path.exists(opts.taxonomy_db): taxa_db = NcbiLocal(opts.taxonomy_db) else: sys.stderr.write( "Did not detect a local copy of the NCBI taxonomy.\n" "Using NCBI EUtils to get taxonomic info instead.\n\n" "The NCBI taxonomy can be dowloaded with the script " "create_local_taxonomy_db.py\n" "This will greatly speed up the assignment process.\n") taxa_db = NcbiEutils() consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS] assigner = Assigner(opts.min_cover, opts.min_species_id, opts.min_genus_id, opts.min_id, consensus_thresholds, opts.min_winning_votes, taxa_db) # Read input files with open(opts.fasta_file) as f: sequences = list(iter_fasta(f)) with open(opts.blast_file) as f: blast_hits = read_blast(f) # Open output files if not os.path.exists(opts.output_directory): os.mkdir(opts.output_directory) standard_taxa_file = open( os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w") log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w") log_file.write( "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t" "Classification\n") # Set up log for voting details vote_logger = logging.getLogger("brocc.votes") vote_logger.setLevel(logging.DEBUG) vote_handler = logging.FileHandler( os.path.join(opts.output_directory, "voting_log.txt")) vote_handler.setLevel(logging.DEBUG) vote_formatter = logging.Formatter('%(message)s') vote_handler.setFormatter(vote_formatter) vote_logger.addHandler(vote_handler) vote_logger.propagate = False # Do the work for name, seq in sequences: seq_hits = blast_hits[name] # This is where the magic happens a = assigner.assign(name, seq, seq_hits) standard_taxa_file.write(a.format_for_standard_taxonomy()) log_file.write(a.format_for_log()) # Close output files standard_taxa_file.close() log_file.close()