def test_filter_by_size() -> None: """Tests the function filter_by_size. Run with pytest""" valid_seq = SeqRecord(seq="ACTGCTG", id="valid") invalid_seq = SeqRecord(seq="ACTG", id="invalid") min_size = 5 assert filter_by_size(valid_seq, min_size) is True assert filter_by_size(invalid_seq, min_size) is False
def write_fasta(filename, data): fd = open(filename, "w") seq_list = [] for i in data.keys(): seq_list.append(SeqRecord(Seq(data.get(i)), id=i, description="")) SeqIO.write(seq_list, fd, "fasta") fd.close()
def unite_aligns(dirname, outfile, full_length, ignore_gaps=False, ignore_level=0.9): """ Uniting consensuses of aligning from directory into one fasta file. :param dirname: directory name with aligns :param outfile: out filename :param full_length: boolean, True or False. If True the confidence level will be done using all length of sequences. For False parameter calculating will be done without 'full gap' ends of sequences :param ignore_gaps: boolean, True or False. Ignoring gaps with high level of confidence (with confidence >= ignore_level) :param ignore_level: float, level of ignoring gaps """ filenames = Extractor.extract_filenames(dirname) records = [] for file in filenames: align = AlignController.__get_alignment_from(file) consensus = align.get_consensus(full_length=full_length) str_cons = consensus.get_str_consensus(ignore_gaps=ignore_gaps, ignore_level=ignore_level) name = f"{basename(file).split('.')[0]}:" description = f"consensus sequence with parameters: full_length={full_length}, ignore_gaps={ignore_gaps}, " \ f"gnore_level={ignore_level}" records.append( SeqRecord(Seq(str_cons), id=name, name=name, description=description)) RecordsWriter(records).write_to(outfile)
def write_fastq_handle(handle, data): seq_list = [] for i in data.keys(): seq_list.append( SeqRecord(Seq(data.get(i)), id=i, description="", letter_annotations={'solexa_quality': data.getqual(i)})) SeqIO.write(seq_list, handle, "fastq")
def __getitem__(self, k): if k not in self.d: raise Exception, "key {0} not in dictionary!".format(k) self.f.seek(self.d[k]) content = '' for line in self.f: if line.startswith('>'): break content += line.strip() return SeqRecord(seq=Seq(content), id=k)
def __getitem__(self, k): if k not in self.d: raise Exception, "key {0} not in dictionary!".format(k) self.f.seek(self.d[k]) sequence = self.f.readline().strip() assert self.f.readline().startswith('+') qualstr = self.f.readline().strip() return SeqRecord(seq=Seq(sequence), id=k, \ letter_annotations={'phred_quality':[ord(x)-33 for x in qualstr]})
def write_fastq(filename, data): fd = open(filename, "w") seq_list = [] for i in data.keys(): seq_list.append( SeqRecord(Seq(data.get(i)), id=i, description="", letter_annotations={'solexa_quality': data.getqual(i)})) SeqIO.write(seq_list, fd, "fastq") fd.close()
def select_positive_and_negative_reads_with_bowtie(reads, vntr_finder, label): working_dir = bowtie_working_dir + '/%s/' % vntr_finder.reference_vntr.id if not os.path.exists(working_dir): os.makedirs(working_dir) fq_file = working_dir + label + '.fa' records = [] for i, read in enumerate(reads): record = SeqRecord('') record.seq = Seq.Seq(read) record.id = 'read_%s/1' % str(i) records.append(record) with open(fq_file, 'w') as output_handle: SeqIO.write(records, output_handle, 'fasta') passed_time = time() bowtie_bamfile = align_with_bowtie(fq_file) bowtie_selected = len( get_id_of_reads_mapped_to_vntr_in_bamfile(bowtie_bamfile, vntr_finder.reference_vntr)) return float(bowtie_selected), float(len(reads) - bowtie_selected), time() - passed_time
def test_seq_with_xgroups() -> None: """Test if the module masked_seqs_stats can successfully find the groups of masked vector subsequences (xgroups) in a sequence that contains one of them """ seq = SeqRecord( seq= ("AAGCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXCGA" "GACGGCCGCCCGGGCAGGTACACCCAAGGATTTAATCGTCAAACCATGACGGGTCTCGAAAATCGAAA" "CGGACAACATGACAAGGAAATGGGCCCGATGATGAACGAAGTCACCAGACCGAGATACATCAGGGACG" "ATAAGAATGCCAAAATTATCGACACATCGGTGGAAAC"), id="DT319107.1", name="DT319107.1", ) seq_class, seq_xgroups = masked_seqs_stats.find_x_regions_and_calculate_stats( seq) # Check the ammount of xgroups assert len(seq_xgroups) == 1 # Check the length of the xgroup pattern = re.compile(r"X+") substring = pattern.search(str(seq.seq)) assert len(substring.group(0)) == seq_xgroups[0].xgroup_len # Check the xgroup distance to 5' first = seq.seq.find("X") assert first == seq_xgroups[0].dist_from_5 # Check the xgroup distance to 3' last = seq.seq.rfind("X") dist = len(seq.seq) - last - 1 assert dist == seq_xgroups[0].dist_from_3 # Check the seq class assert seq_class == 3
def test_seq_without_xgroups() -> None: """Test if the module masked_seqs_stats can successfully take a sequence that has no vector masked subsequences and produce the expected empty output """ seq = SeqRecord( seq=("ACCTATAGGTTGTCGTCGACAAAGAAATGAATCAACTTCCTCTGGTGGTT" "CATGGCAAATGATATCTGGAACTGGTAGTTTACGTGGTTCAACAACAGCC" "CACACATCTATTACAGAGGGATCTAATTCTTCTGGCTCGACTAGCAAAGG" "TTTATTTGAAAATTTTTTACATCAAGCTCATGGATCTAGTAAAGCAATAT" "TGGAAGATGACGAATCCGTATCACAAGTACCTGCCCGGGCGGCCGCTCGA" "AAGCCG"), id="DT319104.1", name="DT319104.1", ) seq_class, seq_xgroups = masked_seqs_stats.find_x_regions_and_calculate_stats( seq) # Check if it has no xgroups assert len(seq_xgroups) == 0 # Check if the seq class is invalid (i.e., 0) assert seq_class == 0
def write_fasta_handle(handle, data): seq_list = [] for i in data.keys(): seq_list.append(SeqRecord(Seq(data.get(i)), id=i, description="")) SeqIO.write(seq_list, handle, "fasta")
def impute_ancestors_dnapars(seqs, gl_seq, scratch_dir, gl_name='germline', verbose=True): """ Compute ancestral states via maximum parsimony @param seqs: list of sequences @param gl_seq: germline sequence @param scratch_dir: where to write intermediate dnapars files @param gl_name: name of germline (must be less than 10 characters long) @return genes_line: information needed to output imputed germline data @return seqs_line: information needed to output imputed sequence data """ from gctree.bin.phylip_parse import parse_outfile assert (len(gl_name) < 10) infile, config, outfile = [ os.path.join(scratch_dir, fname) for fname in [ 'infile', 'dnapars.cfg', 'outfile', ] ] aln = MultipleSeqAlignment([SeqRecord(Seq(gl_seq), id=gl_name)]) # sequence ID must be less than ten characters, but also dnapars sets internal node # names to 1, 2, 3, ..., so name them numbers descending from 100 million, hoping # we won't ever have a clone that big... for idx, seq in enumerate(seqs): aln.append(SeqRecord(Seq(seq), id=str(99999999 - idx))) # dnapars uses the name "infile" as default input phylip file with open(infile, 'w') as phylip_file: phylip_file.write(aln.format('phylip')) # and we need to tell it the line where the root sequence occurs with open(infile, 'r') as phylip_file: for lineno, line in enumerate(phylip_file): if line.startswith(gl_name): naive_idx = str(lineno) # arcane user options for dnapars # 'O', naive_idx: the location of the outgroup root # 'S', 'Y': less thorough search; runs much faster but output is less exhaustive # 'J', 13, 10: randomize input ("jumble") using seed 13 and jumbling 10 times # 4: print out steps in each site (to get all nucleotide info) # 5: print sequences in at all nodes (to get ancestors) # '.': use dot-differencing for display # 'Y': accept these options with open(config, 'w') as cfg_file: cfg_file.write('\n'.join( ['O', naive_idx, 'S', 'Y', 'J', '13', '10', '4', '5', '.', 'Y'])) # defer to command line to construct parsimony trees and ancestral states # dnapars has weird behavior if outfile and outtree already exist o_O cmd = [ 'cd', scratch_dir, '&& rm -f outfile outtree && dnapars <', os.path.basename(config), '> dnapars.log' ] if verbose: print "Calling:", " ".join(cmd) res = subprocess.call([" ".join(cmd)], shell=True) # phew, finally got some trees trees = parse_outfile(outfile, countfile=None, naive=gl_name) # take first parsimony tree genes_line = [] seq_line = [] for idx, descendant in enumerate(trees[0].traverse('preorder')): if descendant.is_root(): descendant.name = gl_name else: # use dummy name for internal node sequences descendant.name = '-'.join([descendant.up.name, descendant.name]) if [descendant.up.name, descendant.up.sequence.lower()] not in genes_line: genes_line.append( [descendant.up.name, descendant.up.sequence.lower()]) seq_line.append([ descendant.up.name, descendant.name, descendant.sequence.lower() ]) return genes_line, seq_line
# Get protein sequence ppb = Bio.PDB.PPBuilder() polypeptides = ppb.build_peptides(structure) seq1 = polypeptides[0].get_sequence() seq2 = polypeptides[1].get_sequence() matrix = matlist.blosum62 gap_open = -10 gap_extend = -0.5 alns = pairwise2.align.globalds(seq1, seq2, matrix, gap_open, gap_extend) top_aln = alns[0] alignment = MultipleSeqAlignment( [SeqRecord(Seq(top_aln[0])), SeqRecord(Seq(top_aln[1]))]) structure_alignment = Bio.PDB.StructureAlignment(alignment, structure[0]['A'], structure[0]['B']) sup = Bio.PDB.Superimposer() ref_atoms = [] mov_atoms = [] for duo in structure_alignment.duos: res1 = duo[0] res2 = duo[1] if res1 and res2: ref_atoms.append(res1['CA']) mov_atoms.append(res2['CA']) sup.set_atoms(ref_atoms, mov_atoms)
def get_hmm_accuracy(vntr_finder, simulated_true_reads, simulated_false_filtered_reads): output_dir = result_dir + '/%s/' % vntr_finder.reference_vntr.id print('running BLAST') from blast_wrapper import get_blast_matched_ids, make_blast_database blast_dir = output_dir + 'blast_dir/' if not os.path.exists(blast_dir): os.makedirs(blast_dir) vntr_id = vntr_finder.reference_vntr.id fasta_file = blast_dir + 'reads.fasta' records = [] for i, read in enumerate(simulated_false_filtered_reads): records.append(SeqRecord(seq=Seq.Seq(read), id='fasle_%s' % i)) for i, read in enumerate(simulated_true_reads): records.append(SeqRecord(seq=Seq.Seq(read), id='true_%s' % i)) with open(fasta_file, 'w') as output_handle: SeqIO.write(records, output_handle, 'fasta') make_blast_database(fasta_file, blast_dir + 'blast_db_%s' % vntr_id) query = '@'.join(get_blast_keywords(vntr_finder.reference_vntr)) search_id = 'search_id' search_results = get_blast_matched_ids(query, blast_dir + 'blast_db_%s' % vntr_id, max_seq='100000', word_size='7', evalue=sys.maxsize, search_id=search_id, identity_cutoff='100', blast_tmp_dir=blast_dir) from collections import Counter res = Counter(search_results) filtered = [item for item, occur in res.items() if occur >= 2] print('BLAST results computed') print(len(filtered)) print(len(simulated_true_reads)) print(len(simulated_false_filtered_reads)) tp = float(len([e for e in filtered if e.startswith('true')])) fp = float(len([e for e in filtered if e.startswith('false')])) fn = float(len(simulated_true_reads) - tp) tn = float(len(simulated_false_filtered_reads) - fp) train_time = 0 passed_time = 0 precision = tp / (tp + fp) if tp > 0 else 0 recall = tp / (tp + fn) accuracy = (100 * (tp + tn) / (fp + fn + tp + tn)) print('BLAST:') print(tp, fp, fn, tn) print('Precision:', precision) print('Recall:', recall) print('acc: %s' % accuracy) with open(output_dir + '/blast.txt', 'w') as outfile: outfile.write('%s\n' % train_time) outfile.write('%s\n' % passed_time) outfile.write('%s\n' % precision) outfile.write('%s\n' % recall) outfile.write('%s\n' % accuracy) outfile.write('%s,%s,%s,%s\n' % (tp, fn, fp, tn)) return passed_time output_dir = result_dir + '/%s/' % vntr_finder.reference_vntr.id if os.path.exists(output_dir + '/hmm.txt') and os.path.getsize(output_dir + '/hmm.txt') > 0: if sum(1 for _ in open(output_dir + 'hmm.txt')) > 5: print('HMM info is already calculated') with open(output_dir + 'hmm.txt') as infile: lines = infile.readlines() return float(lines[1]) train_true_reads = [ read for i, read in enumerate(simulated_true_reads) if i % 2 == 0 ] train_false_reads = [ read for i, read in enumerate(simulated_false_filtered_reads) if i % 2 == 0 ] test_true_reads = [ read for i, read in enumerate(simulated_true_reads) if i % 2 == 1 ] test_false_reads = [ read for i, read in enumerate(simulated_false_filtered_reads) if i % 2 == 1 ] start_time = time() hmm = vntr_finder.get_vntr_matcher_hmm(read_length=read_length) processed_true_reads = vntr_finder.find_hmm_score_of_simulated_reads( hmm, train_true_reads) processed_false_reads = vntr_finder.find_hmm_score_of_simulated_reads( hmm, train_false_reads) recruitment_score = vntr_finder.find_recruitment_score_threshold( processed_true_reads, processed_false_reads) train_time = time() - start_time print('HMM train time: %s' % train_time) tp = 0.0 fn = 0.0 tn = 0.0 fp = 0.0 start_time = time() true_reads = vntr_finder.find_hmm_score_of_simulated_reads( hmm, test_true_reads) false_reads = vntr_finder.find_hmm_score_of_simulated_reads( hmm, test_false_reads) passed_time = time() - start_time for read in true_reads: if read.logp > recruitment_score: tp += 1 else: fn += 1 for read in false_reads: if read.logp > recruitment_score: fp += 1 else: tn += 1 precision = tp / (tp + fp) if tp > 0 else 0 recall = tp / (tp + fn) accuracy = (100 * (tp + tn) / (fp + fn + tp + tn)) print('HMM: %s' % passed_time) print(tp, fp, fn, tn) print('Precision:', precision) print('Recall:', recall) print('acc: %s' % accuracy) if not os.path.exists(output_dir): os.makedirs(output_dir) with open(output_dir + '/hmm.txt', 'w') as outfile: outfile.write('%s\n' % train_time) outfile.write('%s\n' % passed_time) outfile.write('%s\n' % precision) outfile.write('%s\n' % recall) outfile.write('%s\n' % accuracy) outfile.write('%s,%s,%s,%s\n' % (tp, fn, fp, tn)) return passed_time
def pick_rep( fa_fq_filename, sam_filename, gff_filename, group_filename, output_filename, fusion_candidate_ranges, is_fq=False, ): """ For each group, select the representative record Always pick the longest one! """ if is_fq: fd = LazyFastqReader(fa_fq_filename) fout = open(output_filename, "w") else: fd = LazyFastaReader(fa_fq_filename) fout = open(output_filename, "w") rep_info = {} id_to_rep = {} for line in open(group_filename): pb_id, members = line.strip().split("\t") logger.info(f"Picking representative sequence for {pb_id}") best_id = None best_seq = None best_qual = None max_len = 0 for x in members.split(","): if len(fd[x].seq) >= max_len: best_id = x best_seq = fd[x].seq best_qual = fd[x].letter_annotations[ "phred_quality"] if is_fq else None max_len = len(fd[x].seq) rep_info[pb_id] = (best_id, best_seq, best_qual) id_to_rep[best_id] = pb_id f_gff = open(gff_filename, "w") coords = {} record_storage = ( {} ) # temporary storage for the .1 record to write in conjunction with second record for r in BioReaders.GMAPSAMReader(sam_filename, True): if r.qID in id_to_rep: pb_id = id_to_rep[r.qID] # make coordinates & write the SAM file isoform_index = get_isoform_index(fusion_candidate_ranges[r.qID], r.sID, r.sStart, r.sEnd) if r.qID not in coords: coords[r.qID] = [None] * len(fusion_candidate_ranges[r.qID]) record_storage[pb_id] = [None] * len( fusion_candidate_ranges[r.qID]) coords[r.qID][ isoform_index] = f"{r.sID}:{r.sStart}-{r.sEnd}({r.flag.strand})" record_storage[pb_id][isoform_index] = r for pb_id, records in record_storage.items(): for i, r in enumerate(records): isoform_index = i + 1 f_gff.write( f"{r.sID}\tPacBio\ttranscript\t{r.segments[0].start + 1}\t" f"{r.segments[-1].end}\t.\t{r.flag.strand}\t.\t" f'gene_id "{pb_id}"; ' f'transcript_id "{pb_id}.{isoform_index}";\n') for s in r.segments: f_gff.write(f"{r.sID}\tPacBio\texon\t" f"{s.start+1}\t{s.end}\t.\t{r.flag.strand}\t.\t" f'gene_id "{pb_id}"; ' f'transcript_id "{pb_id}.{isoform_index}";\n') f_gff.close() for pb_id in rep_info: best_id, best_seq, best_qual = rep_info[pb_id] _id_ = f"{pb_id}|{'+'.join(coords[best_id])}|{best_id}" _seq_ = best_seq if is_fq: SeqIO.write( SeqRecord(_seq_, id=_id_, letter_annotations={"phred_quality": best_qual}), fout, "fastq", ) else: SeqIO.write(SeqRecord(_seq_, id=_id_), fout, "fasta")
def pick_rep(fa_fq_filename, sam_filename, gff_filename, group_filename, output_filename, fusion_candidate_ranges, is_fq=False): """ For each group, select the representative record Always pick the longest one! """ if is_fq: fd = LazyFastqReader(fa_fq_filename) fout = open(output_filename, 'w') else: fd = LazyFastaReader(fa_fq_filename) fout = open(output_filename, 'w') rep_info = {} id_to_rep = {} for line in open(group_filename): pb_id, members = line.strip().split('\t') print("Picking representative sequence for", pb_id, file=sys.stdout) best_id = None best_seq = None max_len = 0 for x in members.split(','): if len(fd[x].seq) >= max_len: best_id = x best_seq = fd[x].seq best_qual = fd[x].letter_annotations[ 'phred_quality'] if is_fq else None max_len = len(fd[x].seq) rep_info[pb_id] = (best_id, best_seq, best_qual) id_to_rep[best_id] = pb_id f_gff = open(gff_filename, 'w') coords = {} record_storage = { } # temporary storage for the .1 record to write in conjunction with second record for r in BioReaders.GMAPSAMReader(sam_filename, True): if r.qID in id_to_rep: pb_id = id_to_rep[r.qID] # make coordinates & write the SAM file isoform_index = get_isoform_index(fusion_candidate_ranges[r.qID], r.sID, r.sStart, r.sEnd) if r.qID not in coords: coords[r.qID] = [None] * len(fusion_candidate_ranges[r.qID]) record_storage[pb_id] = [None] * len( fusion_candidate_ranges[r.qID]) coords[r.qID][isoform_index] = "{0}:{1}-{2}({3})".format( r.sID, r.sStart, r.sEnd, r.flag.strand) record_storage[pb_id][isoform_index] = r for pb_id, records in record_storage.items(): for i, r in enumerate(records): isoform_index = i + 1 f_gff.write("{chr}\tPacBio\ttranscript\t{s}\t{e}\t.\t{strand}\t.\tgene_id \"{pi}\"; transcript_id \"{pi}.{j}\";\n".format(\ chr=r.sID, s=r.segments[0].start+1, e=r.segments[-1].end, pi=pb_id, j=isoform_index, strand=r.flag.strand)) for s in r.segments: f_gff.write("{chr}\tPacBio\texon\t{s}\t{e}\t.\t{strand}\t.\tgene_id \"{pi}\"; transcript_id \"{pi}.{j}\";\n".format(\ chr=r.sID, s=s.start+1, e=s.end, pi=pb_id, j=isoform_index, strand=r.flag.strand)) f_gff.close() for pb_id in rep_info: best_id, best_seq, best_qual = rep_info[pb_id] _id_ = "{0}|{1}|{2}".format(pb_id, "+".join(coords[best_id]), best_id) _seq_ = best_seq if is_fq: SeqIO.write( SeqRecord(_seq_, id=_id_, letter_annotations={'phred_quality': best_qual}), fout, 'fastq') else: SeqIO.write(SeqRecord(_seq_, id=_id_), fout, 'fasta')
def pick_rep(fa_fq_filename, sam_filename, gff_filename, group_filename, output_filename, is_fq=False, pick_least_err_instead=False): """ For each group, select the representative record If is FASTA file (is_fa False) -- then always pick the longest one If is FASTQ file (is_fq True) -- then If pick_least_err_instead is True, pick the one w/ least number of expected base errors Else, pick the longest one """ if is_fq: fd = LazyFastqReader(fa_fq_filename) fout = open(output_filename, 'w') else: fd = LazyFastaReader(fa_fq_filename) fout = open(output_filename, 'w') # for line in open(gff_filename): # # ex: chr1 PacBio transcript 27567 29336 . - . gene_id "PBfusion.1"; transcript_id "PBfusion.1.1"; # raw = line.strip().split('\t') # if raw[2] == 'transcript': # # check if this is first or 2+ part of fusion # tid = raw[-1].split('; ')[1].split()[1][1:-2] # ex: tid = PBfusion.1.1 # gid = tid[:tid.rfind('.')] # ex: gid = PBfusion.1 # if tid.endswith('.1'): # coords[gid] = "{0}:{1}-{2}({3})".format(raw[0], raw[3], raw[4], raw[6]) # else: # assert gid in coords # coords[gid] += "+{0}:{1}-{2}({3})".format(raw[0], raw[3], raw[4], raw[6]) rep_info = {} id_to_rep = {} for line in open(group_filename): pb_id, members = line.strip().split('\t') print >> sys.stderr, "Picking representative sequence for", pb_id best_id = None best_seq = None best_qual = None best_err = 9999999 err = 9999999 max_len = 0 for x in members.split(','): if is_fq and pick_least_err_instead: err = sum(i**-(i / 10.) for i in fd[x].letter_annotations['phred_quality']) if (is_fq and pick_least_err_instead and err < best_err) or ( (not is_fq or not pick_least_err_instead) and len(fd[x].seq) >= max_len): best_id = x best_seq = fd[x].seq if is_fq: best_qual = fd[x].letter_annotations['phred_quality'] best_err = err max_len = len(fd[x].seq) rep_info[pb_id] = (best_id, best_seq, best_qual) id_to_rep[best_id] = pb_id f_gff = open(gff_filename, 'w') coords = {} record_storage = { } # temporary storage for the .1 record to write in conjunction with second record for r in BioReaders.GMAPSAMReader(sam_filename, True): if r.qID in id_to_rep: pb_id = id_to_rep[r.qID] best_id, best_seq, best_qual = rep_info[pb_id] # make coordinates & write the SAM file if r.qID not in coords: # this is the .1 portion coords[r.qID] = "{0}:{1}-{2}({3})".format( r.sID, r.sStart, r.sEnd, r.flag.strand) isoform_index = 1 record_storage[pb_id] = [r] else: # this is the .2 portion, or even .3, .4....! handle fusions with > 2 loci correctly coords[r.qID] += "+{0}:{1}-{2}({3})".format( r.sID, r.sStart, r.sEnd, r.flag.strand) record_storage[pb_id].append(r) for pb_id, records in record_storage.iteritems(): for i, r in enumerate(records): isoform_index = i + 1 f_gff.write("{chr}\tPacBio\ttranscript\t{s}\t{e}\t.\t{strand}\t.\tgene_id \"{pi}\"; transcript_id \"{pi}.{j}\";\n".format(\ chr=r.sID, s=r.segments[0].start+1, e=r.segments[-1].end, pi=pb_id, j=isoform_index, strand=r.flag.strand)) for s in r.segments: f_gff.write("{chr}\tPacBio\texon\t{s}\t{e}\t.\t{strand}\t.\tgene_id \"{pi}\"; transcript_id \"{pi}.{j}\";\n".format(\ chr=r.sID, s=s.start+1, e=s.end, pi=pb_id, j=isoform_index, strand=r.flag.strand)) f_gff.close() for pb_id in rep_info: best_id, best_seq, best_qual = rep_info[pb_id] _id_ = "{0}|{1}|{2}".format(pb_id, coords[best_id], best_id) _seq_ = best_seq if is_fq: SeqIO.write( SeqRecord(_seq_, id=_id_, letter_annotations={'phred_quality': best_qual}), fout, 'fastq') else: SeqIO.write(SeqRecord(_seq_, id=_id_), fout, 'fasta')