예제 #1
0
def main(argv=None):
    opts = parse_args(argv)

    # Configure

    if opts.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    taxa_db = NcbiEutils(opts.cache_fp)
    taxa_db.load_cache()

    consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS]
    assigner = Assigner(opts.min_cover, opts.min_species_id, opts.min_genus_id,
                        opts.min_id, consensus_thresholds, opts.max_generic,
                        taxa_db)

    # Read input files

    with open(opts.fasta_file) as f:
        sequences = list(iter_fasta(f))

    with open(opts.blast_file) as f:
        blast_hits = read_blast(f)

    # Open output files

    if not os.path.exists(opts.output_directory):
        os.mkdir(opts.output_directory)
    output_file = open(
        os.path.join(opts.output_directory, "Full_Taxonomy.txt"), 'w')
    standard_taxa_file = open(
        os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w")
    log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w")
    log_file.write(
        "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t"
        "Classification\n")

    # Do the work

    for name, seq in sequences:
        seq_hits = blast_hits[name]
        # This is where the magic happens
        a = assigner.assign(name, seq, seq_hits)

        output_file.write(a.format_for_full_taxonomy())
        standard_taxa_file.write(a.format_for_standard_taxonomy())
        log_file.write(a.format_for_log())

    # Close output files, write cache

    output_file.close()
    standard_taxa_file.close()
    log_file.close()

    taxa_db.save_cache()
예제 #2
0
파일: command.py 프로젝트: eclarke/brocc
def main(argv=None):
    opts = parse_args(argv)

    # Configure
    
    if opts.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)
    
    taxa_db = NcbiEutils(opts.cache_fp)
    taxa_db.load_cache()

    consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS]
    assigner = Assigner(
        opts.min_cover, opts.min_species_id, opts.min_genus_id, opts.min_id,
        consensus_thresholds, opts.max_generic, taxa_db)

    # Read input files
    
    with open(opts.fasta_file) as f:
        sequences = list(iter_fasta(f))

    with open(opts.blast_file) as f:
        blast_hits = read_blast(f)

    # Open output files

    if not os.path.exists(opts.output_directory):
        os.mkdir(opts.output_directory)
    output_file = open(
        os.path.join(opts.output_directory, "Full_Taxonomy.txt"), 'w')
    standard_taxa_file = open(
        os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w")
    log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w")
    log_file.write(
        "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t"
        "Classification\n")

    # Do the work

    for name, seq in sequences:
        seq_hits = blast_hits[name]
        # This is where the magic happens
        a = assigner.assign(name, seq, seq_hits)

        output_file.write(a.format_for_full_taxonomy())
        standard_taxa_file.write(a.format_for_standard_taxonomy())
        log_file.write(a.format_for_log())

    # Close output files, write cache

    output_file.close()
    standard_taxa_file.close()
    log_file.close()

    taxa_db.save_cache()
예제 #3
0
 def test_missing_read(self):
     obs = read_blast(StringIO(normal_output))
     self.assertEqual(obs['sdlkj'], [])
예제 #4
0
 def test_malformed_output(self):
     obs = read_blast(StringIO(malformed_output))
     h = obs['0 E7_168192'][0]
     self.assertEqual(h.accession, "GQ513762.1")
     self.assertEqual(h.pct_id, 98.74)
     self.assertEqual(h.length, 159)
예제 #5
0
 def test_malformed_output(self):
     obs = read_blast(StringIO(malformed_output))
     h = obs['0 E7_168192'][0]
     self.assertEqual(h.gi, "259100874")
     self.assertEqual(h.pct_id, 98.74)
     self.assertEqual(h.length, 159)
예제 #6
0
def main(argv=None):
    opts = parse_args(argv)

    # Configure

    if opts.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    if os.path.exists(opts.taxonomy_db):
        taxa_db = NcbiLocal(opts.taxonomy_db)
    else:
        sys.stderr.write(
            "Did not detect a local copy of the NCBI taxonomy.\n"
            "Using NCBI EUtils to get taxonomic info instead.\n\n"
            "The NCBI taxonomy can be dowloaded with the script "
            "create_local_taxonomy_db.py\n"
            "This will greatly speed up the assignment process.\n")
        taxa_db = NcbiEutils()

    consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS]
    assigner = Assigner(opts.min_cover, opts.min_species_id, opts.min_genus_id,
                        opts.min_id, consensus_thresholds,
                        opts.min_winning_votes, taxa_db)

    # Read input files

    with open(opts.fasta_file) as f:
        sequences = list(iter_fasta(f))

    with open(opts.blast_file) as f:
        blast_hits = read_blast(f)

    # Open output files

    if not os.path.exists(opts.output_directory):
        os.mkdir(opts.output_directory)
    standard_taxa_file = open(
        os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w")
    log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w")
    log_file.write(
        "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t"
        "Classification\n")

    # Set up log for voting details
    vote_logger = logging.getLogger("brocc.votes")
    vote_logger.setLevel(logging.DEBUG)
    vote_handler = logging.FileHandler(
        os.path.join(opts.output_directory, "voting_log.txt"))
    vote_handler.setLevel(logging.DEBUG)
    vote_formatter = logging.Formatter('%(message)s')
    vote_handler.setFormatter(vote_formatter)
    vote_logger.addHandler(vote_handler)
    vote_logger.propagate = False
    # Do the work

    for name, seq in sequences:
        seq_hits = blast_hits[name]
        # This is where the magic happens
        a = assigner.assign(name, seq, seq_hits)

        standard_taxa_file.write(a.format_for_standard_taxonomy())
        log_file.write(a.format_for_log())

    # Close output files

    standard_taxa_file.close()
    log_file.close()
예제 #7
0
파일: test_parse.py 프로젝트: eclarke/brocc
 def test_missing_read(self):
     obs = read_blast(StringIO(normal_output))
     self.assertEqual(obs['sdlkj'], [])
예제 #8
0
파일: test_parse.py 프로젝트: eclarke/brocc
 def test_malformed_output(self):
     obs = read_blast(StringIO(malformed_output))
     h = obs['0 E7_168192'][0]
     self.assertEqual(h.gi, "259100874")
     self.assertEqual(h.pct_id, 98.74)
     self.assertEqual(h.length, 159)