Python Taxonomy.keys Examples

Programming Language: Python

Namespace/Package Name: biolib.taxonomy

Class/Type: Taxonomy

Method/Function: keys

Examples at hotexamples.com: 4

Python Taxonomy.keys - 4 examples found. These are the top rated real world Python examples of biolib.taxonomy.Taxonomy.keys extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Taxonomy(30)

get(9)

items(9)

read(6)

iteritems(5)

copy(3)

keys(3)

validate(3)

check_full(2)

extant_taxa_for_rank(2)

named_lineages_at_rank(2)

read_from_tree(2)

extract_valid_species_name(1)

fill_missing_ranks(1)

update(1)

Example #1

Show file

File: main.py Project: Python3pkg/GenomeTreeTk

    def diff(self, options):
        """Compare two taxonomy files."""

        check_file_exists(options.input_taxonomy1)
        check_file_exists(options.input_taxonomy2)

        taxonomy1 = Taxonomy().read(options.input_taxonomy1)
        taxonomy2 = Taxonomy().read(options.input_taxonomy2)

        all_taxon_ids = set(taxonomy1.keys()).union(list(taxonomy2.keys()))

        rank_index = Taxonomy.rank_labels.index(options.rank)
        for taxon_id in all_taxon_ids:
            if options.report_missing_taxa:
                if taxon_id not in taxonomy1:
                    print('Missing in taxonomy 1: %s' % taxon_id)
                elif taxon_id not in taxonomy2:
                    print('Missing in taxonomy 2: %s' % taxon_id)

            if taxon_id in taxonomy1 and taxon_id in taxonomy2:
                taxon1 = taxonomy1[taxon_id][rank_index]
                taxon2 = taxonomy2[taxon_id][rank_index]

                if taxon1 != taxon2:
                    if options.report_missing_ranks or (taxon1[3:]
                                                        and taxon2[3:]):
                        print('Different taxon for %s: %s %s' %
                              (taxon_id, taxon1, taxon2))

        print('Done.')

Example #2

Show file

    def run(self,
                input_taxonomy,
                genome_path_file,
                metadata_file, 
                max_genomes,
                min_comp,
                max_cont,
                min_quality, 
                max_contigs, 
                min_N50, 
                max_ambiguous, 
                max_gap_length, 
                output_dir):
        """Calculate ANI for named species."""
        
        # get genomes passing filtering criteria
        filtered_genome_ids = filter_genomes(metadata_file,
                                                min_comp,
                                                max_cont,
                                                min_quality, 
                                                max_contigs, 
                                                min_N50, 
                                                max_ambiguous, 
                                                max_gap_length)
                                                
        # get species in each named species
        taxonomy = Taxonomy().read(input_taxonomy)
        genome_ids_to_remove = set(taxonomy.keys()) - filtered_genome_ids
        for genome_id in genome_ids_to_remove:
            del taxonomy[genome_id]
            
        named_species = Taxonomy().extant_taxa_for_rank('species', taxonomy)
        
        # get path to nucleotide files
        nt_files = {}
        for line in open(genome_path_file):
            line_split = line.strip().split('\t')

            gtdb_id = line_split[0]
            genome_id = gtdb_id.replace('GB_', '').replace('RS_', '')

            genome_dir = line_split[1]

            nt_file = os.path.join(genome_dir, 'prodigal', genome_id + '_protein.fna')
            nt_files[gtdb_id] = nt_file

        # populate worker queue with data to process
        worker_queue = mp.Queue()
        writer_queue = mp.Queue()

        num_species = 0
        for species, genome_ids in named_species.items():
            if len(genome_ids) > 1:
                worker_queue.put((species, genome_ids))
                num_species += 1

        for _ in range(self.cpus):
          worker_queue.put((None, None))

        try:
          worker_proc = [mp.Process(target=self.__worker, args=(metadata_file,
                                                                    nt_files,
                                                                    max_genomes,
                                                                    worker_queue,
                                                                    writer_queue)) for _ in range(self.cpus)]
          write_proc = mp.Process(target=self.__writer, args=(num_species,
                                                                  output_dir,
                                                                  writer_queue))

          write_proc.start()

          for p in worker_proc:
              p.start()

          for p in worker_proc:
              p.join()

          writer_queue.put((None, None, None, None, None))
          write_proc.join()
        except:
          for p in worker_proc:
            p.terminate()

          write_proc.terminate()

Example #3

Show file

File: tax_diff.py Project: dparks1134/PhyloRank

   def tax_diff(self, tax1_file, tax2_file, include_user_taxa, output_dir):
       """Tabulate differences between two taxonomies.
       
       Parameters
       ----------
       tax1_file : str
           First taxonomy file.
       tax2_file : str
           Second taxonomy file.
       include_user_taxa : boolean
           Flag indicating if User genomes should be considered.
       output_dir : str
           Output directory.
       """
       
       tax1 = Taxonomy().read(tax1_file)
       tax2 = Taxonomy().read(tax2_file)
       
       if not include_user_taxa:
           new_tax1 = {}
           for genome_id, taxonomy in tax1.iteritems():
               if not genome_id.startswith('U_'):
                   new_tax1[genome_id] = taxonomy
           tax1 = new_tax1
           
           new_tax2 = {}
           for genome_id, taxonomy in tax2.iteritems():
               if not genome_id.startswith('U_'):
                   new_tax2[genome_id] = taxonomy
           tax2 = new_tax2
       
       common_taxa = set(tax1.keys()).intersection(tax2.keys())
       
       self.logger.info('First taxonomy contains %d taxa.' % len(tax1))
       self.logger.info('Second taxonomy contains %d taxa.' % len(tax2))
       self.logger.info('Taxonomies have %d taxa in common.' % len(common_taxa))
       
       # identify differences between taxonomies
       tax_file_name1 = os.path.splitext(os.path.basename(tax1_file))[0]
       tax_file_name2 = os.path.splitext(os.path.basename(tax2_file))[0]
       output_table = os.path.join(output_dir, '%s.tax_diff.tsv' % tax_file_name1)
       
       fout = open(output_table, 'w')
       fout.write('Genome ID\tChange\tRank\t%s\t%s\n' % (tax_file_name1, tax_file_name2))
       
       unchanged = defaultdict(int)           # T2 = g__Bob -> T1 = g__Bob, or T2 = g__ -> T1 = g__
       active_change = defaultdict(int)       # T2 = g__Bob -> T1 = g__Jane, or T2 = g__Bob -> T1 = g__Bob_A
       passive_change = defaultdict(int)      # T2 = g__??? -> T1 = g__Jane
       unresolved_change = defaultdict(int)   # T2 = g__Box -> T1 = g__???
       for taxa in common_taxa:
           t1 = tax1[taxa]
           t2 = tax2[taxa]
           
           for rank, (taxon1, taxon2) in enumerate(zip(t1, t2)):
               if taxon1 == taxon2:
                   unchanged[rank] += 1
               elif taxon1 != Taxonomy.rank_prefixes[rank] and taxon2 != Taxonomy.rank_prefixes[rank]:
                   active_change[rank] += 1
                   fout.write('%s\t%s\t%s\t%s\t%s\n' % (taxa, 'active', Taxonomy.rank_labels[rank], ';'.join(t1), ';'.join(t2)))
               elif taxon2 == Taxonomy.rank_prefixes[rank]:
                   passive_change[rank] += 1
                   fout.write('%s\t%s\t%s\t%s\t%s\n' % (taxa, 'passive', Taxonomy.rank_labels[rank], ';'.join(t1), ';'.join(t2)))
               elif taxon1 == Taxonomy.rank_prefixes[rank]:
                   unresolved_change[rank] += 1
                   fout.write('%s\t%s\t%s\t%s\t%s\n' % (taxa, 'unresolved', Taxonomy.rank_labels[rank], ';'.join(t1), ';'.join(t2)))
                   
       fout.close()
 
       # report results
       output_table = os.path.join(output_dir, '%s.tax_diff_summary.tsv' % tax_file_name1)
       
       fout = open(output_table, 'w')
       fout.write('Rank\tUnchanged\tUnchanged (%)\tActive\t Active (%)\tPassive\tPassive (%)\tUnresolved\tUnresolved (%)\n')
       print 'Rank\tUnchanged\tActive\tPassive\tUnresolved\tTotal'
       for rank in xrange(0, len(Taxonomy.rank_prefixes)):
           total = unchanged[rank] + active_change[rank] + passive_change[rank] + unresolved_change[rank]
           if total != 0:
               fout.write('%s\t%d\t%.1f\t%d\t%.1f\t%d\t%.1f\t%d\t%.1f\n' %
                                   (Taxonomy.rank_labels[rank],
                                   unchanged[rank], unchanged[rank] * 100.0 / total,
                                   active_change[rank], active_change[rank] * 100.0 / total,
                                   passive_change[rank], passive_change[rank] * 100.0 / total,
                                   unresolved_change[rank], unresolved_change[rank] * 100.0 / total))
               print '%s\t%d\t%d\t%d\t%d\t%d' % (Taxonomy.rank_labels[rank],
                                                   unchanged[rank],
                                                   active_change[rank],
                                                   passive_change[rank],
                                                   unresolved_change[rank],
                                                   total)

Example #4

Show file

   def tax_diff(self, tax1_file, tax2_file, include_user_taxa, output_dir):
       """Tabulate differences between two taxonomies.
       
       Parameters
       ----------
       tax1_file : str
           First taxonomy file.
       tax2_file : str
           Second taxonomy file.
       include_user_taxa : boolean
           Flag indicating if User genomes should be considered.
       output_dir : str
           Output directory.
       """
       
       tax1 = Taxonomy().read(tax1_file)
       tax2 = Taxonomy().read(tax2_file)
       
       if not include_user_taxa:
           new_tax1 = {}
           for genome_id, taxonomy in tax1.items():
               if not genome_id.startswith('U_'):
                   new_tax1[genome_id] = taxonomy
           tax1 = new_tax1
           
           new_tax2 = {}
           for genome_id, taxonomy in tax2.items():
               if not genome_id.startswith('U_'):
                   new_tax2[genome_id] = taxonomy
           tax2 = new_tax2
       
       common_taxa = set(tax1.keys()).intersection(list(tax2.keys()))
       
       self.logger.info('First taxonomy contains %d taxa.' % len(tax1))
       self.logger.info('Second taxonomy contains %d taxa.' % len(tax2))
       self.logger.info('Taxonomies have %d taxa in common.' % len(common_taxa))
       
       # identify differences between taxonomies
       tax_file_name1 = os.path.splitext(os.path.basename(tax1_file))[0]
       tax_file_name2 = os.path.splitext(os.path.basename(tax2_file))[0]
       output_table = os.path.join(output_dir, '%s.tax_diff.tsv' % tax_file_name1)
       
       fout = open(output_table, 'w')
       fout.write('Genome ID\tChange\tRank\t%s\t%s\n' % (tax_file_name1, tax_file_name2))
       
       unchanged = defaultdict(int)           # T2 = g__Bob -> T1 = g__Bob, or T2 = g__ -> T1 = g__
       active_change = defaultdict(int)       # T2 = g__Bob -> T1 = g__Jane, or T2 = g__Bob -> T1 = g__Bob_A
       passive_change = defaultdict(int)      # T2 = g__??? -> T1 = g__Jane
       unresolved_change = defaultdict(int)   # T2 = g__Box -> T1 = g__???
       for taxa in common_taxa:
           t1 = tax1[taxa]
           t2 = tax2[taxa]
           
           for rank, (taxon1, taxon2) in enumerate(list(zip(t1, t2))):
               if taxon1 == taxon2:
                   unchanged[rank] += 1
               elif taxon1 != Taxonomy.rank_prefixes[rank] and taxon2 != Taxonomy.rank_prefixes[rank]:
                   active_change[rank] += 1
                   fout.write('%s\t%s\t%s\t%s\t%s\n' % (taxa, 'active', Taxonomy.rank_labels[rank], ';'.join(t1), ';'.join(t2)))
               elif taxon2 == Taxonomy.rank_prefixes[rank]:
                   passive_change[rank] += 1
                   fout.write('%s\t%s\t%s\t%s\t%s\n' % (taxa, 'passive', Taxonomy.rank_labels[rank], ';'.join(t1), ';'.join(t2)))
               elif taxon1 == Taxonomy.rank_prefixes[rank]:
                   unresolved_change[rank] += 1
                   fout.write('%s\t%s\t%s\t%s\t%s\n' % (taxa, 'unresolved', Taxonomy.rank_labels[rank], ';'.join(t1), ';'.join(t2)))
                   
       fout.close()
 
       # report results
       output_table = os.path.join(output_dir, '%s.tax_diff_summary.tsv' % tax_file_name1)
       
       fout = open(output_table, 'w')
       fout.write('Rank\tUnchanged\tUnchanged (%)\tActive\t Active (%)\tPassive\tPassive (%)\tUnresolved\tUnresolved (%)\n')
       print('Rank\tUnchanged\tActive\tPassive\tUnresolved\tTotal')
       for rank in range(0, len(Taxonomy.rank_prefixes)):
           total = unchanged[rank] + active_change[rank] + passive_change[rank] + unresolved_change[rank]
           if total != 0:
               fout.write('%s\t%d\t%.1f\t%d\t%.1f\t%d\t%.1f\t%d\t%.1f\n' %
                                   (Taxonomy.rank_labels[rank],
                                   unchanged[rank], unchanged[rank] * 100.0 / total,
                                   active_change[rank], active_change[rank] * 100.0 / total,
                                   passive_change[rank], passive_change[rank] * 100.0 / total,
                                   unresolved_change[rank], unresolved_change[rank] * 100.0 / total))
               print('%s\t%d\t%d\t%d\t%d\t%d' % (Taxonomy.rank_labels[rank],
                                                   unchanged[rank],
                                                   active_change[rank],
                                                   passive_change[rank],
                                                   unresolved_change[rank],
                                                   total))