def compare_record(self, old, new, fmt=None, msg=None): """Quality aware SeqRecord comparison. This will check the mapping between Solexa and PHRED scores. It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files). """ super().compare_record(old, new, msg=None) if fmt in ["fastq-solexa", "fastq-illumina"]: truncate = 62 elif fmt in ["fastq", "fastq-sanger"]: truncate = 93 else: assert fmt in ["fasta", "qual", "phd", "sff", "tab", None] truncate = None for keyword in ("phred_quality", "solexa_quality"): q_old = old.letter_annotations.get(keyword) q_new = new.letter_annotations.get(keyword) if q_old is None or q_new is None: continue if truncate is not None and q_old != q_new: q_old = [min(q, truncate) for q in q_old] q_new = [min(q, truncate) for q in q_new] err_msg = "mismatch in %s" % keyword if msg is not None: err_msg = "%s: %s" % (msg, err_msg) self.assertEqual(q_old, q_new, msg=err_msg) q_old = old.letter_annotations.get("phred_quality") q_new = new.letter_annotations.get("solexa_quality") if q_old is not None and q_new is not None: # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa. # Assume "old" is the original, and "new" has been converted. converted = [ round(QualityIO.solexa_quality_from_phred(q)) for q in q_old ] if truncate is not None: converted = [min(q, truncate) for q in converted] err_msg = "mismatch converting phred_quality %s to solexa_quality" % q_old if msg is not None: err_msg = "%s: %s" % (msg, err_msg) self.assertEqual(converted, q_new, msg=err_msg) q_old = old.letter_annotations.get("solexa_quality") q_new = new.letter_annotations.get("phred_quality") if q_old is not None and q_new is not None: # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa. # Assume "old" is the original, and "new" has been converted. converted = [ round(QualityIO.phred_quality_from_solexa(q)) for q in q_old ] if truncate is not None: converted = [min(q, truncate) for q in converted] err_msg = "mismatch converting solexa_quality %s to phred_quality" % q_old if msg is not None: err_msg = "%s: %s" % (msg, err_msg) self.assertEqual(converted, q_new, msg=err_msg)
def test_solexa_to_sanger(self): """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62).""" # The point of this test is the writing code doesn't actually use the # solexa_quality_from_phred function directly. For speed it uses a # cached dictionary of the mappings. seq = "N" * 68 qual = "".join(chr(64 + q) for q in range(-5, 63)) expected_phred = [ round(QualityIO.phred_quality_from_solexa(q)) for q in range(-5, 63) ] in_handle = StringIO(f"@Test\n{seq}\n+\n{qual}") out_handle = StringIO() SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"), out_handle, "fastq-sanger") out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-sanger") self.assertEqual(record.seq, seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_solexa_to_sanger(self): """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)""" # The point of this test is the writing code doesn't actually use the # solexa_quality_from_phred function directly. For speed it uses a # cached dictionary of the mappings. seq = "N"*68 qual = "".join(chr(64+q) for q in range(-5, 63)) expected_phred = [round(QualityIO.phred_quality_from_solexa(q)) for q in range(-5, 63)] in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual)) out_handle = StringIO() SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"), out_handle, "fastq-sanger") out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-sanger") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_solexa_to_sanger(self): """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)""" #The point of this test is the writing code doesn't actually use the #solexa_quality_from_phred function directly. For speed it uses a #cached dictionary of the mappings. seq = "N" * 68 qual = "".join(chr(64 + q) for q in range(-5, 63)) expected_phred = [round(QualityIO.phred_quality_from_solexa(q)) \ for q in range(-5,63)] in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual)) out_handle = StringIO("") #Want to ignore the data loss warning #(on Python 2.6 we could check for it!) warnings.simplefilter('ignore', UserWarning) SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"), out_handle, "fastq-sanger") warnings.resetwarnings() out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-sanger") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def compare_record(self, old, new, truncate, msg): """Quality aware SeqRecord comparison. This will check the mapping between Solexa and PHRED scores. It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files). """ super().compare_record(old, new, msg=msg) for keyword in ("phred_quality", "solexa_quality"): q_old = old.letter_annotations.get(keyword) q_new = new.letter_annotations.get(keyword) if q_old is None or q_new is None: continue if truncate and q_old != q_new: q_old = [min(q, truncate) for q in q_old] q_new = [min(q, truncate) for q in q_new] err_msg = "%s: mismatch in %s" % (msg, keyword) self.assertEqual(q_old, q_new, msg=err_msg) q_old = old.letter_annotations.get("phred_quality") q_new = new.letter_annotations.get("solexa_quality") if q_old is not None and q_new is not None: # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa. # Assume "old" is the original, and "new" has been converted. converted = [round(QualityIO.solexa_quality_from_phred(q)) for q in q_old] if truncate: converted = [min(q, truncate) for q in converted] err_msg = "%s: mismatch in phred_quality vs solexa_quality" % msg self.assertEqual(converted, q_new, msg=err_msg) q_old = old.letter_annotations.get("solexa_quality") q_new = new.letter_annotations.get("phred_quality") if q_old is not None and q_new is not None: # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa. # Assume "old" is the original, and "new" has been converted. converted = [round(QualityIO.phred_quality_from_solexa(q)) for q in q_old] if truncate: converted = [min(q, truncate) for q in converted] err_msg = "%s: mismatch in solexa_quality vs phred_quality" % msg self.assertEqual(converted, q_new, msg=err_msg)
def test_phred_quality_from_solexa(self): """Mapping check for function phred_quality_from_solexa""" self.assertEqual(1, round(QualityIO.phred_quality_from_solexa(-5))) self.assertEqual(1, round(QualityIO.phred_quality_from_solexa(-4))) self.assertEqual(2, round(QualityIO.phred_quality_from_solexa(-3))) self.assertEqual(2, round(QualityIO.phred_quality_from_solexa(-2))) self.assertEqual(3, round(QualityIO.phred_quality_from_solexa(-1))) self.assertEqual(3, round(QualityIO.phred_quality_from_solexa(0))) self.assertEqual(4, round(QualityIO.phred_quality_from_solexa(1))) self.assertEqual(4, round(QualityIO.phred_quality_from_solexa(2))) self.assertEqual(5, round(QualityIO.phred_quality_from_solexa(3))) self.assertEqual(5, round(QualityIO.phred_quality_from_solexa(4))) self.assertEqual(6, round(QualityIO.phred_quality_from_solexa(5))) self.assertEqual(7, round(QualityIO.phred_quality_from_solexa(6))) self.assertEqual(8, round(QualityIO.phred_quality_from_solexa(7))) self.assertEqual(9, round(QualityIO.phred_quality_from_solexa(8))) self.assertEqual(10, round(QualityIO.phred_quality_from_solexa(9))) for i in range(10,100): self.assertEqual(i, round(QualityIO.phred_quality_from_solexa(i)))
def compare_record(old, new, truncate=None): """Quality aware SeqRecord comparison. This will check the mapping between Solexa and PHRED scores. It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files). """ if old.id != new.id: raise ValueError("'%s' vs '%s' " % (old.id, new.id)) if old.description != new.description \ and (old.id+" "+old.description).strip() != new.description: raise ValueError("'%s' vs '%s' " % (old.description, new.description)) if len(old.seq) != len(new.seq): raise ValueError("%i vs %i" % (len(old.seq), len(new.seq))) if isinstance(old.seq, UnknownSeq) or isinstance(new.seq, UnknownSeq): pass elif str(old.seq) != str(new.seq): if len(old.seq) < 200: raise ValueError("'%s' vs '%s'" % (old.seq, new.seq)) else: raise ValueError("'%s...' vs '%s...'" % (old.seq[:100], new.seq[:100])) if "phred_quality" in old.letter_annotations \ and "phred_quality" in new.letter_annotations \ and old.letter_annotations["phred_quality"] != new.letter_annotations["phred_quality"]: if truncate and [min(q,truncate) for q in old.letter_annotations["phred_quality"]] == \ [min(q,truncate) for q in new.letter_annotations["phred_quality"]]: pass else: raise ValuerError("Mismatch in phred_quality") if "solexa_quality" in old.letter_annotations \ and "solexa_quality" in new.letter_annotations \ and old.letter_annotations["solexa_quality"] != new.letter_annotations["solexa_quality"]: if truncate and [min(q,truncate) for q in old.letter_annotations["solexa_quality"]] == \ [min(q,truncate) for q in new.letter_annotations["solexa_quality"]]: pass else: raise ValueError("Mismatch in phred_quality") if "phred_quality" in old.letter_annotations \ and "solexa_quality" in new.letter_annotations: #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa. #Assume "old" is the original, and "new" has been converted. converted = [round(QualityIO.solexa_quality_from_phred(q)) for q in old.letter_annotations["phred_quality"]] if truncate: converted = [min(q,truncate) for q in converted] if converted != new.letter_annotations["solexa_quality"]: print print(old.letter_annotations["phred_quality"]) print(converted) print(new.letter_annotations["solexa_quality"]) raise ValueError("Mismatch in phred_quality vs solexa_quality") if "solexa_quality" in old.letter_annotations \ and "phred_quality" in new.letter_annotations: #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa. #Assume "old" is the original, and "new" has been converted. converted = [round(QualityIO.phred_quality_from_solexa(q)) for q in old.letter_annotations["solexa_quality"]] if truncate: converted = [min(q,truncate) for q in converted] if converted != new.letter_annotations["phred_quality"]: print(old.letter_annotations["solexa_quality"]) print(converted) print(new.letter_annotations["phred_quality"]) raise ValueError("Mismatch in solexa_quality vs phred_quality") return True
def test_phred_quality_from_solexa(self): """Mapping check for function phred_quality_from_solexa""" self.assertEqual(1, round(QualityIO.phred_quality_from_solexa(-5))) self.assertEqual(1, round(QualityIO.phred_quality_from_solexa(-4))) self.assertEqual(2, round(QualityIO.phred_quality_from_solexa(-3))) self.assertEqual(2, round(QualityIO.phred_quality_from_solexa(-2))) self.assertEqual(3, round(QualityIO.phred_quality_from_solexa(-1))) self.assertEqual(3, round(QualityIO.phred_quality_from_solexa(0))) self.assertEqual(4, round(QualityIO.phred_quality_from_solexa(1))) self.assertEqual(4, round(QualityIO.phred_quality_from_solexa(2))) self.assertEqual(5, round(QualityIO.phred_quality_from_solexa(3))) self.assertEqual(5, round(QualityIO.phred_quality_from_solexa(4))) self.assertEqual(6, round(QualityIO.phred_quality_from_solexa(5))) self.assertEqual(7, round(QualityIO.phred_quality_from_solexa(6))) self.assertEqual(8, round(QualityIO.phred_quality_from_solexa(7))) self.assertEqual(9, round(QualityIO.phred_quality_from_solexa(8))) self.assertEqual(10, round(QualityIO.phred_quality_from_solexa(9))) for i in range(10, 100): self.assertEqual(i, round(QualityIO.phred_quality_from_solexa(i)))
def compare_record(old, new, truncate=None): """Quality aware SeqRecord comparison. This will check the mapping between Solexa and PHRED scores. It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files). """ if old.id != new.id: raise ValueError("'%s' vs '%s' " % (old.id, new.id)) if old.description != new.description \ and (old.id+" "+old.description).strip() != new.description: raise ValueError("'%s' vs '%s' " % (old.description, new.description)) if len(old.seq) != len(new.seq): raise ValueError("%i vs %i" % (len(old.seq), len(new.seq))) if isinstance(old.seq, UnknownSeq) or isinstance(new.seq, UnknownSeq): pass elif str(old.seq) != str(new.seq): if len(old.seq) < 200: raise ValueError("'%s' vs '%s'" % (old.seq, new.seq)) else: raise ValueError("'%s...' vs '%s...'" % (old.seq[:100], new.seq[:100])) if "phred_quality" in old.letter_annotations \ and "phred_quality" in new.letter_annotations \ and old.letter_annotations["phred_quality"] != new.letter_annotations["phred_quality"]: if truncate and [min(q,truncate) for q in old.letter_annotations["phred_quality"]] == \ [min(q,truncate) for q in new.letter_annotations["phred_quality"]]: pass else: raise ValuerError("Mismatch in phred_quality") if "solexa_quality" in old.letter_annotations \ and "solexa_quality" in new.letter_annotations \ and old.letter_annotations["solexa_quality"] != new.letter_annotations["solexa_quality"]: if truncate and [min(q,truncate) for q in old.letter_annotations["solexa_quality"]] == \ [min(q,truncate) for q in new.letter_annotations["solexa_quality"]]: pass else: raise ValueError("Mismatch in phred_quality") if "phred_quality" in old.letter_annotations \ and "solexa_quality" in new.letter_annotations: #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa. #Assume "old" is the original, and "new" has been converted. converted = [ round(QualityIO.solexa_quality_from_phred(q)) for q in old.letter_annotations["phred_quality"] ] if truncate: converted = [min(q, truncate) for q in converted] if converted != new.letter_annotations["solexa_quality"]: print print(old.letter_annotations["phred_quality"]) print(converted) print(new.letter_annotations["solexa_quality"]) raise ValueError("Mismatch in phred_quality vs solexa_quality") if "solexa_quality" in old.letter_annotations \ and "phred_quality" in new.letter_annotations: #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa. #Assume "old" is the original, and "new" has been converted. converted = [ round(QualityIO.phred_quality_from_solexa(q)) for q in old.letter_annotations["solexa_quality"] ] if truncate: converted = [min(q, truncate) for q in converted] if converted != new.letter_annotations["phred_quality"]: print(old.letter_annotations["solexa_quality"]) print(converted) print(new.letter_annotations["phred_quality"]) raise ValueError("Mismatch in solexa_quality vs phred_quality") return True