Python NcbiEutils Exemples, brocclib.get_xml.NcbiEutils Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : command.py Projet : eclarke/brocc

def main(argv=None):
    opts = parse_args(argv)

    # Configure
    
    if opts.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)
    
    taxa_db = NcbiEutils(opts.cache_fp)
    taxa_db.load_cache()

    consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS]
    assigner = Assigner(
        opts.min_cover, opts.min_species_id, opts.min_genus_id, opts.min_id,
        consensus_thresholds, opts.max_generic, taxa_db)

    # Read input files
    
    with open(opts.fasta_file) as f:
        sequences = list(iter_fasta(f))

    with open(opts.blast_file) as f:
        blast_hits = read_blast(f)

    # Open output files

    if not os.path.exists(opts.output_directory):
        os.mkdir(opts.output_directory)
    output_file = open(
        os.path.join(opts.output_directory, "Full_Taxonomy.txt"), 'w')
    standard_taxa_file = open(
        os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w")
    log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w")
    log_file.write(
        "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t"
        "Classification\n")

    # Do the work

    for name, seq in sequences:
        seq_hits = blast_hits[name]
        # This is where the magic happens
        a = assigner.assign(name, seq, seq_hits)

        output_file.write(a.format_for_full_taxonomy())
        standard_taxa_file.write(a.format_for_standard_taxonomy())
        log_file.write(a.format_for_log())

    # Close output files, write cache

    output_file.close()
    standard_taxa_file.close()
    log_file.close()

    taxa_db.save_cache()

Exemple #2

0

Afficher le fichier

    def test_save_load_cache(self):
        lineages = {
            "taxon1": {'class': "a", "genus": "b"},
            "taxon2": {'class': "c", "genus": "d"},
            }
        taxon_ids = {"taxon1": "b", "taxon2": "d"}
        self.db.lineages = lineages
        self.db.taxon_ids = taxon_ids
        self.db._fresh = False
        self.db.save_cache()

        db2 = NcbiEutils(self.cache_file.name)
        db2.load_cache()
        self.assertEqual(db2.lineages, lineages)
        self.assertEqual(db2.taxon_ids, taxon_ids)

Exemple #3

0

Afficher le fichier

class NcbiEutilsTests(unittest.TestCase):
    def setUp(self):
        self.cache_file = tempfile.NamedTemporaryFile(suffix=".json")
        self.db = NcbiEutils(self.cache_file.name)

    def test_save_load_cache(self):
        lineages = {
            "taxon1": {'class': "a", "genus": "b"},
            "taxon2": {'class': "c", "genus": "d"},
            }
        taxon_ids = {"taxon1": "b", "taxon2": "d"}
        self.db.lineages = lineages
        self.db.taxon_ids = taxon_ids
        self.db._fresh = False
        self.db.save_cache()

        db2 = NcbiEutils(self.cache_file.name)
        db2.load_cache()
        self.assertEqual(db2.lineages, lineages)
        self.assertEqual(db2.taxon_ids, taxon_ids)

    def test_get_taxon_id(self):
        self.assertEqual(self.db.get_taxon_id("312434489"), "531911")
        self.assertEqual(self.db.taxon_ids, {"312434489": "531911"})

    def test_get_lineage(self):
        observed_lineage = self.db.get_lineage("531911")
        expected_lineage = {
            'Lineage': (
                'cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; '
                'Ascomycota; saccharomyceta; Pezizomycotina; leotiomyceta; '
                'sordariomyceta; Sordariomycetes; Xylariomycetidae; '
                'Xylariales; Amphisphaeriaceae; Pestalotiopsis'),
            'class': 'Sordariomycetes',
            'family': 'Amphisphaeriaceae',
            'genus': 'Pestalotiopsis',
            'kingdom': 'Fungi',
            'no rank': 'sordariomyceta',
            'order': 'Xylariales',
            'phylum': 'Ascomycota',
            'species': 'Pestalotiopsis maculiformans',
            'subclass': 'Xylariomycetidae',
            'subkingdom': 'Dikarya',
            'subphylum': 'Pezizomycotina',
            'superkingdom': 'Eukaryota',
            }
        self.assertEqual(observed_lineage, expected_lineage)
        self.assertEqual(self.db.lineages, {'531911': expected_lineage})

Exemple #4

0

Afficher le fichier

 def test_get_lineage(self):
     db = NcbiEutils()
     observed_lineage = db.get_lineage("531911")
     expected_lineage = [('cellular organisms', 'no rank'),
                         ('Eukaryota', 'superkingdom'),
                         ('Opisthokonta', 'no rank'), ('Fungi', 'kingdom'),
                         ('Dikarya', 'subkingdom'),
                         ('Ascomycota', 'phylum'),
                         ('saccharomyceta', 'no rank'),
                         ('Pezizomycotina', 'subphylum'),
                         ('leotiomyceta', 'no rank'),
                         ('sordariomyceta', 'no rank'),
                         ('Sordariomycetes', 'class'),
                         ('Xylariomycetidae', 'subclass'),
                         ('Xylariales', 'order'),
                         ('Sporocadaceae', 'family'),
                         ('Pestalotiopsis', 'genus'),
                         ('Pestalotiopsis maculiformans', 'species')]
     self.assertEqual(observed_lineage, expected_lineage)
     self.assertEqual(db.lineages, {'531911': expected_lineage})

Exemple #5

0

Afficher le fichier

def main(argv=None):
    opts = parse_args(argv)

    # Configure

    if opts.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    taxa_db = NcbiEutils(opts.cache_fp)
    taxa_db.load_cache()

    consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS]
    assigner = Assigner(opts.min_cover, opts.min_species_id, opts.min_genus_id,
                        opts.min_id, consensus_thresholds, opts.max_generic,
                        taxa_db)

    # Read input files

    with open(opts.fasta_file) as f:
        sequences = list(iter_fasta(f))

    with open(opts.blast_file) as f:
        blast_hits = read_blast(f)

    # Open output files

    if not os.path.exists(opts.output_directory):
        os.mkdir(opts.output_directory)
    output_file = open(
        os.path.join(opts.output_directory, "Full_Taxonomy.txt"), 'w')
    standard_taxa_file = open(
        os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w")
    log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w")
    log_file.write(
        "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t"
        "Classification\n")

    # Do the work

    for name, seq in sequences:
        seq_hits = blast_hits[name]
        # This is where the magic happens
        a = assigner.assign(name, seq, seq_hits)

        output_file.write(a.format_for_full_taxonomy())
        standard_taxa_file.write(a.format_for_standard_taxonomy())
        log_file.write(a.format_for_log())

    # Close output files, write cache

    output_file.close()
    standard_taxa_file.close()
    log_file.close()

    taxa_db.save_cache()

Exemple #6

0

Afficher le fichier

 def test_get_taxon_id(self):
     db = NcbiEutils()
     self.assertEqual(db.get_taxon_id("HQ608011.1"), "531911")
     self.assertEqual(db.taxon_ids, {"HQ608011.1": "531911"})

Exemple #7

0

Afficher le fichier

 def setUp(self):
     self.cache_file = tempfile.NamedTemporaryFile(suffix=".json")
     self.db = NcbiEutils(self.cache_file.name)

Exemple #8

0

Afficher le fichier

def main(argv=None):
    opts = parse_args(argv)

    # Configure

    if opts.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    if os.path.exists(opts.taxonomy_db):
        taxa_db = NcbiLocal(opts.taxonomy_db)
    else:
        sys.stderr.write(
            "Did not detect a local copy of the NCBI taxonomy.\n"
            "Using NCBI EUtils to get taxonomic info instead.\n\n"
            "The NCBI taxonomy can be dowloaded with the script "
            "create_local_taxonomy_db.py\n"
            "This will greatly speed up the assignment process.\n")
        taxa_db = NcbiEutils()

    consensus_thresholds = [t for _, t in CONSENSUS_THRESHOLDS]
    assigner = Assigner(opts.min_cover, opts.min_species_id, opts.min_genus_id,
                        opts.min_id, consensus_thresholds,
                        opts.min_winning_votes, taxa_db)

    # Read input files

    with open(opts.fasta_file) as f:
        sequences = list(iter_fasta(f))

    with open(opts.blast_file) as f:
        blast_hits = read_blast(f)

    # Open output files

    if not os.path.exists(opts.output_directory):
        os.mkdir(opts.output_directory)
    standard_taxa_file = open(
        os.path.join(opts.output_directory, "Standard_Taxonomy.txt"), "w")
    log_file = open(os.path.join(opts.output_directory, "brocc.log"), "w")
    log_file.write(
        "Sequence\tWinner_Votes\tVotes_Cast\tGenerics_Pruned\tLevel\t"
        "Classification\n")

    # Set up log for voting details
    vote_logger = logging.getLogger("brocc.votes")
    vote_logger.setLevel(logging.DEBUG)
    vote_handler = logging.FileHandler(
        os.path.join(opts.output_directory, "voting_log.txt"))
    vote_handler.setLevel(logging.DEBUG)
    vote_formatter = logging.Formatter('%(message)s')
    vote_handler.setFormatter(vote_formatter)
    vote_logger.addHandler(vote_handler)
    vote_logger.propagate = False
    # Do the work

    for name, seq in sequences:
        seq_hits = blast_hits[name]
        # This is where the magic happens
        a = assigner.assign(name, seq, seq_hits)

        standard_taxa_file.write(a.format_for_standard_taxonomy())
        log_file.write(a.format_for_log())

    # Close output files

    standard_taxa_file.close()
    log_file.close()