def update_read_dict(read2contig, blast_top_m8, read_dict, accession_dict): consolidated_dict = read_dict read2blastm8 = {} contig2accession = {} contig2lineage = {} added_reads = {} for contig_id, accession_id, _percent_id, _alignment_length, e_value, _bitscore, line in m8.iterate_m8( blast_top_m8): contig2accession[contig_id] = (accession_id, line) contig2lineage[contig_id] = accession_dict[accession_id] for read_id, contig_id in read2contig.items(): (accession, m8_line) = contig2accession.get(contig_id, (None, None)) if accession: (species_taxid, genus_taxid, family_taxid) = accession_dict[accession] if consolidated_dict.get(read_id): consolidated_dict[read_id] += [ contig_id, accession, species_taxid, genus_taxid, family_taxid ] consolidated_dict[read_id][2] = species_taxid else: added_reads[read_id] = [ read_id, "1", species_taxid, accession, species_taxid, genus_taxid, family_taxid, contig_id, accession, species_taxid, genus_taxid, family_taxid, 'from_assembly' ] if m8_line: read2blastm8[read_id] = m8_line return (consolidated_dict, read2blastm8, contig2lineage, added_reads)
def get_top_m8(orig_m8, top_entry_m8): ''' Get top m8 file entry for each read from orig_m8 and output to top_entry_m8 ''' with open(top_entry_m8, 'w') as top_m8f: top_line = None top_bitscore = 0 current_read_id = None for read_id, _accession_id, _percent_id, _alignment_length, e_value, bitscore, line in m8.iterate_m8( orig_m8): # Get the top entry of each read_id based on the bitscore if read_id != current_read_id: # Different batch start if current_read_id: # Not the first line top_m8f.write(top_line) current_read_id = read_id top_line = line top_bitscore = bitscore elif bitscore > top_bitscore: top_bitscore = bitscore top_line = line top_m8f.write(top_line)
def get_map(m8_file): return dict((read_id, accession_id) for read_id, accession_id, _percent_id, _alignment_length, _e_value, _line in m8.iterate_m8( m8_file, "annotate_fasta_with_accessions"))