Example #1
0
    def update_read_dict(read2contig, blast_top_m8, read_dict, accession_dict):
        consolidated_dict = read_dict
        read2blastm8 = {}
        contig2accession = {}
        contig2lineage = {}
        added_reads = {}

        for contig_id, accession_id, _percent_id, _alignment_length, e_value, _bitscore, line in m8.iterate_m8(
                blast_top_m8):
            contig2accession[contig_id] = (accession_id, line)
            contig2lineage[contig_id] = accession_dict[accession_id]

        for read_id, contig_id in read2contig.items():
            (accession,
             m8_line) = contig2accession.get(contig_id, (None, None))
            if accession:
                (species_taxid, genus_taxid,
                 family_taxid) = accession_dict[accession]
                if consolidated_dict.get(read_id):
                    consolidated_dict[read_id] += [
                        contig_id, accession, species_taxid, genus_taxid,
                        family_taxid
                    ]
                    consolidated_dict[read_id][2] = species_taxid
                else:
                    added_reads[read_id] = [
                        read_id, "1", species_taxid, accession, species_taxid,
                        genus_taxid, family_taxid, contig_id, accession,
                        species_taxid, genus_taxid, family_taxid,
                        'from_assembly'
                    ]
            if m8_line:
                read2blastm8[read_id] = m8_line
        return (consolidated_dict, read2blastm8, contig2lineage, added_reads)
Example #2
0
 def get_top_m8(orig_m8, top_entry_m8):
     ''' Get top m8 file entry for each read from orig_m8 and output to top_entry_m8 '''
     with open(top_entry_m8, 'w') as top_m8f:
         top_line = None
         top_bitscore = 0
         current_read_id = None
         for read_id, _accession_id, _percent_id, _alignment_length, e_value, bitscore, line in m8.iterate_m8(
                 orig_m8):
             # Get the top entry of each read_id based on the bitscore
             if read_id != current_read_id:
                 # Different batch start
                 if current_read_id:  # Not the first line
                     top_m8f.write(top_line)
                 current_read_id = read_id
                 top_line = line
                 top_bitscore = bitscore
             elif bitscore > top_bitscore:
                 top_bitscore = bitscore
                 top_line = line
         top_m8f.write(top_line)
 def get_map(m8_file):
     return dict((read_id, accession_id)
                 for read_id, accession_id, _percent_id,
                 _alignment_length, _e_value, _line in m8.iterate_m8(
                     m8_file, "annotate_fasta_with_accessions"))