def compare_record(self, old, new, fmt=None, msg=None):
        """Quality aware SeqRecord comparison.

        This will check the mapping between Solexa and PHRED scores.
        It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
        """
        super().compare_record(old, new, msg=None)
        if fmt in ["fastq-solexa", "fastq-illumina"]:
            truncate = 62
        elif fmt in ["fastq", "fastq-sanger"]:
            truncate = 93
        else:
            assert fmt in ["fasta", "qual", "phd", "sff", "tab", None]
            truncate = None
        for keyword in ("phred_quality", "solexa_quality"):
            q_old = old.letter_annotations.get(keyword)
            q_new = new.letter_annotations.get(keyword)
            if q_old is None or q_new is None:
                continue
            if truncate is not None and q_old != q_new:
                q_old = [min(q, truncate) for q in q_old]
                q_new = [min(q, truncate) for q in q_new]
            err_msg = "mismatch in %s" % keyword
            if msg is not None:
                err_msg = "%s: %s" % (msg, err_msg)
            self.assertEqual(q_old, q_new, msg=err_msg)

        q_old = old.letter_annotations.get("phred_quality")
        q_new = new.letter_annotations.get("solexa_quality")
        if q_old is not None and q_new is not None:
            # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
            # Assume "old" is the original, and "new" has been converted.
            converted = [
                round(QualityIO.solexa_quality_from_phred(q)) for q in q_old
            ]
            if truncate is not None:
                converted = [min(q, truncate) for q in converted]
            err_msg = "mismatch converting phred_quality %s to solexa_quality" % q_old
            if msg is not None:
                err_msg = "%s: %s" % (msg, err_msg)
            self.assertEqual(converted, q_new, msg=err_msg)

        q_old = old.letter_annotations.get("solexa_quality")
        q_new = new.letter_annotations.get("phred_quality")
        if q_old is not None and q_new is not None:
            # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
            # Assume "old" is the original, and "new" has been converted.
            converted = [
                round(QualityIO.phred_quality_from_solexa(q)) for q in q_old
            ]
            if truncate is not None:
                converted = [min(q, truncate) for q in converted]
            err_msg = "mismatch converting solexa_quality %s to phred_quality" % q_old
            if msg is not None:
                err_msg = "%s: %s" % (msg, err_msg)
            self.assertEqual(converted, q_new, msg=err_msg)
 def test_solexa_to_sanger(self):
     """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)."""
     # The point of this test is the writing code doesn't actually use the
     # solexa_quality_from_phred function directly. For speed it uses a
     # cached dictionary of the mappings.
     seq = "N" * 68
     qual = "".join(chr(64 + q) for q in range(-5, 63))
     expected_phred = [
         round(QualityIO.phred_quality_from_solexa(q)) for q in range(-5, 63)
     ]
     in_handle = StringIO(f"@Test\n{seq}\n+\n{qual}")
     out_handle = StringIO()
     SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"), out_handle, "fastq-sanger")
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-sanger")
     self.assertEqual(record.seq, seq)
     self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
 def test_solexa_to_sanger(self):
     """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)"""
     # The point of this test is the writing code doesn't actually use the
     # solexa_quality_from_phred function directly. For speed it uses a
     # cached dictionary of the mappings.
     seq = "N"*68
     qual = "".join(chr(64+q) for q in range(-5, 63))
     expected_phred = [round(QualityIO.phred_quality_from_solexa(q))
                       for q in range(-5, 63)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"),
                 out_handle, "fastq-sanger")
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-sanger")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
 def test_solexa_to_sanger(self):
     """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)"""
     #The point of this test is the writing code doesn't actually use the
     #solexa_quality_from_phred function directly. For speed it uses a
     #cached dictionary of the mappings.
     seq = "N" * 68
     qual = "".join(chr(64 + q) for q in range(-5, 63))
     expected_phred = [round(QualityIO.phred_quality_from_solexa(q)) \
                       for q in range(-5,63)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO("")
     #Want to ignore the data loss warning
     #(on Python 2.6 we could check for it!)
     warnings.simplefilter('ignore', UserWarning)
     SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"), out_handle,
                 "fastq-sanger")
     warnings.resetwarnings()
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-sanger")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
    def compare_record(self, old, new, truncate, msg):
        """Quality aware SeqRecord comparison.

        This will check the mapping between Solexa and PHRED scores.
        It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
        """
        super().compare_record(old, new, msg=msg)
        for keyword in ("phred_quality", "solexa_quality"):
            q_old = old.letter_annotations.get(keyword)
            q_new = new.letter_annotations.get(keyword)
            if q_old is None or q_new is None:
                continue
            if truncate and q_old != q_new:
                q_old = [min(q, truncate) for q in q_old]
                q_new = [min(q, truncate) for q in q_new]
            err_msg = "%s: mismatch in %s" % (msg, keyword)
            self.assertEqual(q_old, q_new, msg=err_msg)

        q_old = old.letter_annotations.get("phred_quality")
        q_new = new.letter_annotations.get("solexa_quality")
        if q_old is not None and q_new is not None:
            # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
            # Assume "old" is the original, and "new" has been converted.
            converted = [round(QualityIO.solexa_quality_from_phred(q)) for q in q_old]
            if truncate:
                converted = [min(q, truncate) for q in converted]
            err_msg = "%s: mismatch in phred_quality vs solexa_quality" % msg
            self.assertEqual(converted, q_new, msg=err_msg)

        q_old = old.letter_annotations.get("solexa_quality")
        q_new = new.letter_annotations.get("phred_quality")
        if q_old is not None and q_new is not None:
            # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
            # Assume "old" is the original, and "new" has been converted.
            converted = [round(QualityIO.phred_quality_from_solexa(q)) for q in q_old]
            if truncate:
                converted = [min(q, truncate) for q in converted]
            err_msg = "%s: mismatch in solexa_quality vs phred_quality" % msg
            self.assertEqual(converted, q_new, msg=err_msg)
 def test_phred_quality_from_solexa(self):
     """Mapping check for function phred_quality_from_solexa"""
     self.assertEqual(1, round(QualityIO.phred_quality_from_solexa(-5)))
     self.assertEqual(1, round(QualityIO.phred_quality_from_solexa(-4)))
     self.assertEqual(2, round(QualityIO.phred_quality_from_solexa(-3)))
     self.assertEqual(2, round(QualityIO.phred_quality_from_solexa(-2)))
     self.assertEqual(3, round(QualityIO.phred_quality_from_solexa(-1)))
     self.assertEqual(3, round(QualityIO.phred_quality_from_solexa(0)))
     self.assertEqual(4, round(QualityIO.phred_quality_from_solexa(1)))
     self.assertEqual(4, round(QualityIO.phred_quality_from_solexa(2)))
     self.assertEqual(5, round(QualityIO.phred_quality_from_solexa(3)))
     self.assertEqual(5, round(QualityIO.phred_quality_from_solexa(4)))
     self.assertEqual(6, round(QualityIO.phred_quality_from_solexa(5)))
     self.assertEqual(7, round(QualityIO.phred_quality_from_solexa(6)))
     self.assertEqual(8, round(QualityIO.phred_quality_from_solexa(7)))
     self.assertEqual(9, round(QualityIO.phred_quality_from_solexa(8)))
     self.assertEqual(10, round(QualityIO.phred_quality_from_solexa(9)))
     for i in range(10,100):
         self.assertEqual(i, round(QualityIO.phred_quality_from_solexa(i)))
def compare_record(old, new, truncate=None):
    """Quality aware SeqRecord comparison.

    This will check the mapping between Solexa and PHRED scores.
    It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
    """
    if old.id != new.id:
        raise ValueError("'%s' vs '%s' " % (old.id, new.id))
    if old.description != new.description \
    and (old.id+" "+old.description).strip() != new.description:
        raise ValueError("'%s' vs '%s' " % (old.description, new.description))
    if len(old.seq) != len(new.seq):
        raise ValueError("%i vs %i" % (len(old.seq), len(new.seq)))
    if isinstance(old.seq, UnknownSeq) or isinstance(new.seq, UnknownSeq):
        pass
    elif str(old.seq) != str(new.seq):
        if len(old.seq) < 200:
            raise ValueError("'%s' vs '%s'" % (old.seq, new.seq))
        else:
            raise ValueError("'%s...' vs '%s...'" % (old.seq[:100], new.seq[:100]))
    if "phred_quality" in old.letter_annotations \
    and "phred_quality" in new.letter_annotations \
    and old.letter_annotations["phred_quality"] != new.letter_annotations["phred_quality"]:
        if truncate and [min(q,truncate) for q in old.letter_annotations["phred_quality"]] == \
                        [min(q,truncate) for q in new.letter_annotations["phred_quality"]]:
            pass
        else:
            raise ValuerError("Mismatch in phred_quality")
    if "solexa_quality" in old.letter_annotations \
    and "solexa_quality" in new.letter_annotations \
    and old.letter_annotations["solexa_quality"] != new.letter_annotations["solexa_quality"]:
        if truncate and [min(q,truncate) for q in old.letter_annotations["solexa_quality"]] == \
                        [min(q,truncate) for q in new.letter_annotations["solexa_quality"]]:
            pass
        else:
            raise ValueError("Mismatch in phred_quality")
    if "phred_quality" in old.letter_annotations \
    and "solexa_quality" in new.letter_annotations:
        #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
        #Assume "old" is the original, and "new" has been converted.
        converted = [round(QualityIO.solexa_quality_from_phred(q))
                     for q in old.letter_annotations["phred_quality"]]
        if truncate:
            converted = [min(q,truncate) for q in converted]
        if converted != new.letter_annotations["solexa_quality"]:
            print
            print(old.letter_annotations["phred_quality"])
            print(converted)
            print(new.letter_annotations["solexa_quality"])
            raise ValueError("Mismatch in phred_quality vs solexa_quality")
    if "solexa_quality" in old.letter_annotations \
    and "phred_quality" in new.letter_annotations:
        #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
        #Assume "old" is the original, and "new" has been converted.
        converted = [round(QualityIO.phred_quality_from_solexa(q))
                     for q in old.letter_annotations["solexa_quality"]]
        if truncate:
            converted = [min(q,truncate) for q in converted]
        if converted != new.letter_annotations["phred_quality"]:
            print(old.letter_annotations["solexa_quality"])
            print(converted)
            print(new.letter_annotations["phred_quality"])
            raise ValueError("Mismatch in solexa_quality vs phred_quality")
    return True
Exemple #8
0
 def test_phred_quality_from_solexa(self):
     """Mapping check for function phred_quality_from_solexa"""
     self.assertEqual(1, round(QualityIO.phred_quality_from_solexa(-5)))
     self.assertEqual(1, round(QualityIO.phred_quality_from_solexa(-4)))
     self.assertEqual(2, round(QualityIO.phred_quality_from_solexa(-3)))
     self.assertEqual(2, round(QualityIO.phred_quality_from_solexa(-2)))
     self.assertEqual(3, round(QualityIO.phred_quality_from_solexa(-1)))
     self.assertEqual(3, round(QualityIO.phred_quality_from_solexa(0)))
     self.assertEqual(4, round(QualityIO.phred_quality_from_solexa(1)))
     self.assertEqual(4, round(QualityIO.phred_quality_from_solexa(2)))
     self.assertEqual(5, round(QualityIO.phred_quality_from_solexa(3)))
     self.assertEqual(5, round(QualityIO.phred_quality_from_solexa(4)))
     self.assertEqual(6, round(QualityIO.phred_quality_from_solexa(5)))
     self.assertEqual(7, round(QualityIO.phred_quality_from_solexa(6)))
     self.assertEqual(8, round(QualityIO.phred_quality_from_solexa(7)))
     self.assertEqual(9, round(QualityIO.phred_quality_from_solexa(8)))
     self.assertEqual(10, round(QualityIO.phred_quality_from_solexa(9)))
     for i in range(10, 100):
         self.assertEqual(i, round(QualityIO.phred_quality_from_solexa(i)))
Exemple #9
0
def compare_record(old, new, truncate=None):
    """Quality aware SeqRecord comparison.

    This will check the mapping between Solexa and PHRED scores.
    It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
    """
    if old.id != new.id:
        raise ValueError("'%s' vs '%s' " % (old.id, new.id))
    if old.description != new.description \
    and (old.id+" "+old.description).strip() != new.description:
        raise ValueError("'%s' vs '%s' " % (old.description, new.description))
    if len(old.seq) != len(new.seq):
        raise ValueError("%i vs %i" % (len(old.seq), len(new.seq)))
    if isinstance(old.seq, UnknownSeq) or isinstance(new.seq, UnknownSeq):
        pass
    elif str(old.seq) != str(new.seq):
        if len(old.seq) < 200:
            raise ValueError("'%s' vs '%s'" % (old.seq, new.seq))
        else:
            raise ValueError("'%s...' vs '%s...'" %
                             (old.seq[:100], new.seq[:100]))
    if "phred_quality" in old.letter_annotations \
    and "phred_quality" in new.letter_annotations \
    and old.letter_annotations["phred_quality"] != new.letter_annotations["phred_quality"]:
        if truncate and [min(q,truncate) for q in old.letter_annotations["phred_quality"]] == \
                        [min(q,truncate) for q in new.letter_annotations["phred_quality"]]:
            pass
        else:
            raise ValuerError("Mismatch in phred_quality")
    if "solexa_quality" in old.letter_annotations \
    and "solexa_quality" in new.letter_annotations \
    and old.letter_annotations["solexa_quality"] != new.letter_annotations["solexa_quality"]:
        if truncate and [min(q,truncate) for q in old.letter_annotations["solexa_quality"]] == \
                        [min(q,truncate) for q in new.letter_annotations["solexa_quality"]]:
            pass
        else:
            raise ValueError("Mismatch in phred_quality")
    if "phred_quality" in old.letter_annotations \
    and "solexa_quality" in new.letter_annotations:
        #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
        #Assume "old" is the original, and "new" has been converted.
        converted = [
            round(QualityIO.solexa_quality_from_phred(q))
            for q in old.letter_annotations["phred_quality"]
        ]
        if truncate:
            converted = [min(q, truncate) for q in converted]
        if converted != new.letter_annotations["solexa_quality"]:
            print
            print(old.letter_annotations["phred_quality"])
            print(converted)
            print(new.letter_annotations["solexa_quality"])
            raise ValueError("Mismatch in phred_quality vs solexa_quality")
    if "solexa_quality" in old.letter_annotations \
    and "phred_quality" in new.letter_annotations:
        #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
        #Assume "old" is the original, and "new" has been converted.
        converted = [
            round(QualityIO.phred_quality_from_solexa(q))
            for q in old.letter_annotations["solexa_quality"]
        ]
        if truncate:
            converted = [min(q, truncate) for q in converted]
        if converted != new.letter_annotations["phred_quality"]:
            print(old.letter_annotations["solexa_quality"])
            print(converted)
            print(new.letter_annotations["phred_quality"])
            raise ValueError("Mismatch in solexa_quality vs phred_quality")
    return True