def compare_record(self, old, new, fmt=None, msg=None):
        """Quality aware SeqRecord comparison.

        This will check the mapping between Solexa and PHRED scores.
        It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
        """
        super().compare_record(old, new, msg=None)
        if fmt in ["fastq-solexa", "fastq-illumina"]:
            truncate = 62
        elif fmt in ["fastq", "fastq-sanger"]:
            truncate = 93
        else:
            assert fmt in ["fasta", "qual", "phd", "sff", "tab", None]
            truncate = None
        for keyword in ("phred_quality", "solexa_quality"):
            q_old = old.letter_annotations.get(keyword)
            q_new = new.letter_annotations.get(keyword)
            if q_old is None or q_new is None:
                continue
            if truncate is not None and q_old != q_new:
                q_old = [min(q, truncate) for q in q_old]
                q_new = [min(q, truncate) for q in q_new]
            err_msg = "mismatch in %s" % keyword
            if msg is not None:
                err_msg = "%s: %s" % (msg, err_msg)
            self.assertEqual(q_old, q_new, msg=err_msg)

        q_old = old.letter_annotations.get("phred_quality")
        q_new = new.letter_annotations.get("solexa_quality")
        if q_old is not None and q_new is not None:
            # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
            # Assume "old" is the original, and "new" has been converted.
            converted = [
                round(QualityIO.solexa_quality_from_phred(q)) for q in q_old
            ]
            if truncate is not None:
                converted = [min(q, truncate) for q in converted]
            err_msg = "mismatch converting phred_quality %s to solexa_quality" % q_old
            if msg is not None:
                err_msg = "%s: %s" % (msg, err_msg)
            self.assertEqual(converted, q_new, msg=err_msg)

        q_old = old.letter_annotations.get("solexa_quality")
        q_new = new.letter_annotations.get("phred_quality")
        if q_old is not None and q_new is not None:
            # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
            # Assume "old" is the original, and "new" has been converted.
            converted = [
                round(QualityIO.phred_quality_from_solexa(q)) for q in q_old
            ]
            if truncate is not None:
                converted = [min(q, truncate) for q in converted]
            err_msg = "mismatch converting solexa_quality %s to phred_quality" % q_old
            if msg is not None:
                err_msg = "%s: %s" % (msg, err_msg)
            self.assertEqual(converted, q_new, msg=err_msg)
 def test_sanger_to_solexa(self):
     """Mapping check for FASTQ Sanger (0 to 93) to Solexa (-5 to 62)"""
     # The point of this test is the writing code doesn't actually use the
     # solexa_quality_from_phred function directly. For speed it uses a
     # cached dictionary of the mappings.
     seq = "N"*94
     qual = "".join(chr(33+q) for q in range(0, 94))
     expected_sol = [min(62, int(round(QualityIO.solexa_quality_from_phred(q))))
                     for q in range(0, 94)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always", BiopythonWarning)
         SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"),
                     out_handle, "fastq-solexa")
         self.assertTrue(len(w) <= 1, w)
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-solexa")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["solexa_quality"],
                      expected_sol)
 def test_sanger_to_solexa(self):
     """Mapping check for FASTQ Sanger (0 to 93) to Solexa (-5 to 62)"""
     # The point of this test is the writing code doesn't actually use the
     # solexa_quality_from_phred function directly. For speed it uses a
     # cached dictionary of the mappings.
     seq = "N" * 94
     qual = "".join(chr(33 + q) for q in range(0, 94))
     expected_sol = [min(62, int(round(QualityIO.solexa_quality_from_phred(q))))
                     for q in range(0, 94)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always", BiopythonWarning)
         SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"),
                     out_handle, "fastq-solexa")
         self.assertTrue(len(w) <= 1, w)
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-solexa")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["solexa_quality"],
                      expected_sol)
 def test_sanger_to_solexa(self):
     """Mapping check for FASTQ Sanger (0 to 93) to Solexa (-5 to 62)"""
     #The point of this test is the writing code doesn't actually use the
     #solexa_quality_from_phred function directly. For speed it uses a
     #cached dictionary of the mappings.
     seq = "N"*94
     qual = "".join(chr(33+q) for q in range(0,94))
     expected_sol = [min(62,int(round(QualityIO.solexa_quality_from_phred(q))))
                     for q in range(0,94)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq,qual))
     out_handle = StringIO("")
     #Want to ignore the data loss warning
     #(on Python 2.6 we could check for it!)
     warnings.simplefilter('ignore', BiopythonWarning)
     SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"),
                 out_handle, "fastq-solexa")
     warnings.filters.pop()
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-solexa")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["solexa_quality"],
                      expected_sol)
 def test_sanger_to_solexa(self):
     """Mapping check for FASTQ Sanger (0 to 93) to Solexa (-5 to 62)"""
     #The point of this test is the writing code doesn't actually use the
     #solexa_quality_from_phred function directly. For speed it uses a
     #cached dictionary of the mappings.
     seq = "N" * 94
     qual = "".join(chr(33 + q) for q in range(0, 94))
     expected_sol = [min(62,int(round(QualityIO.solexa_quality_from_phred(q)))) \
                     for q in range(0,94)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO("")
     #Want to ignore the data loss warning
     #(on Python 2.6 we could check for it!)
     warnings.simplefilter('ignore', UserWarning)
     SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"), out_handle,
                 "fastq-solexa")
     warnings.resetwarnings()
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-solexa")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["solexa_quality"],
                      expected_sol)
    def compare_record(self, old, new, truncate, msg):
        """Quality aware SeqRecord comparison.

        This will check the mapping between Solexa and PHRED scores.
        It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
        """
        super().compare_record(old, new, msg=msg)
        for keyword in ("phred_quality", "solexa_quality"):
            q_old = old.letter_annotations.get(keyword)
            q_new = new.letter_annotations.get(keyword)
            if q_old is None or q_new is None:
                continue
            if truncate and q_old != q_new:
                q_old = [min(q, truncate) for q in q_old]
                q_new = [min(q, truncate) for q in q_new]
            err_msg = "%s: mismatch in %s" % (msg, keyword)
            self.assertEqual(q_old, q_new, msg=err_msg)

        q_old = old.letter_annotations.get("phred_quality")
        q_new = new.letter_annotations.get("solexa_quality")
        if q_old is not None and q_new is not None:
            # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
            # Assume "old" is the original, and "new" has been converted.
            converted = [round(QualityIO.solexa_quality_from_phred(q)) for q in q_old]
            if truncate:
                converted = [min(q, truncate) for q in converted]
            err_msg = "%s: mismatch in phred_quality vs solexa_quality" % msg
            self.assertEqual(converted, q_new, msg=err_msg)

        q_old = old.letter_annotations.get("solexa_quality")
        q_new = new.letter_annotations.get("phred_quality")
        if q_old is not None and q_new is not None:
            # Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
            # Assume "old" is the original, and "new" has been converted.
            converted = [round(QualityIO.phred_quality_from_solexa(q)) for q in q_old]
            if truncate:
                converted = [min(q, truncate) for q in converted]
            err_msg = "%s: mismatch in solexa_quality vs phred_quality" % msg
            self.assertEqual(converted, q_new, msg=err_msg)
 def test_solexa_quality_from_phred(self):
     """Mapping check for function solexa_quality_from_phred"""
     self.assertEqual(-5, round(QualityIO.solexa_quality_from_phred(0)))
     self.assertEqual(-5, round(QualityIO.solexa_quality_from_phred(1)))
     self.assertEqual(-2, round(QualityIO.solexa_quality_from_phred(2)))
     self.assertEqual(0, round(QualityIO.solexa_quality_from_phred(3)))
     self.assertEqual(2, round(QualityIO.solexa_quality_from_phred(4)))
     self.assertEqual(3, round(QualityIO.solexa_quality_from_phred(5)))
     self.assertEqual(5, round(QualityIO.solexa_quality_from_phred(6)))
     self.assertEqual(6, round(QualityIO.solexa_quality_from_phred(7)))
     self.assertEqual(7, round(QualityIO.solexa_quality_from_phred(8)))
     self.assertEqual(8, round(QualityIO.solexa_quality_from_phred(9)))
     for i in range(10,100):
         self.assertEqual(i, round(QualityIO.solexa_quality_from_phred(i)))
def compare_record(old, new, truncate=None):
    """Quality aware SeqRecord comparison.

    This will check the mapping between Solexa and PHRED scores.
    It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
    """
    if old.id != new.id:
        raise ValueError("'%s' vs '%s' " % (old.id, new.id))
    if old.description != new.description \
    and (old.id+" "+old.description).strip() != new.description:
        raise ValueError("'%s' vs '%s' " % (old.description, new.description))
    if len(old.seq) != len(new.seq):
        raise ValueError("%i vs %i" % (len(old.seq), len(new.seq)))
    if isinstance(old.seq, UnknownSeq) or isinstance(new.seq, UnknownSeq):
        pass
    elif str(old.seq) != str(new.seq):
        if len(old.seq) < 200:
            raise ValueError("'%s' vs '%s'" % (old.seq, new.seq))
        else:
            raise ValueError("'%s...' vs '%s...'" % (old.seq[:100], new.seq[:100]))
    if "phred_quality" in old.letter_annotations \
    and "phred_quality" in new.letter_annotations \
    and old.letter_annotations["phred_quality"] != new.letter_annotations["phred_quality"]:
        if truncate and [min(q,truncate) for q in old.letter_annotations["phred_quality"]] == \
                        [min(q,truncate) for q in new.letter_annotations["phred_quality"]]:
            pass
        else:
            raise ValuerError("Mismatch in phred_quality")
    if "solexa_quality" in old.letter_annotations \
    and "solexa_quality" in new.letter_annotations \
    and old.letter_annotations["solexa_quality"] != new.letter_annotations["solexa_quality"]:
        if truncate and [min(q,truncate) for q in old.letter_annotations["solexa_quality"]] == \
                        [min(q,truncate) for q in new.letter_annotations["solexa_quality"]]:
            pass
        else:
            raise ValueError("Mismatch in phred_quality")
    if "phred_quality" in old.letter_annotations \
    and "solexa_quality" in new.letter_annotations:
        #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
        #Assume "old" is the original, and "new" has been converted.
        converted = [round(QualityIO.solexa_quality_from_phred(q))
                     for q in old.letter_annotations["phred_quality"]]
        if truncate:
            converted = [min(q,truncate) for q in converted]
        if converted != new.letter_annotations["solexa_quality"]:
            print
            print(old.letter_annotations["phred_quality"])
            print(converted)
            print(new.letter_annotations["solexa_quality"])
            raise ValueError("Mismatch in phred_quality vs solexa_quality")
    if "solexa_quality" in old.letter_annotations \
    and "phred_quality" in new.letter_annotations:
        #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
        #Assume "old" is the original, and "new" has been converted.
        converted = [round(QualityIO.phred_quality_from_solexa(q))
                     for q in old.letter_annotations["solexa_quality"]]
        if truncate:
            converted = [min(q,truncate) for q in converted]
        if converted != new.letter_annotations["phred_quality"]:
            print(old.letter_annotations["solexa_quality"])
            print(converted)
            print(new.letter_annotations["phred_quality"])
            raise ValueError("Mismatch in solexa_quality vs phred_quality")
    return True
Exemple #9
0
 def test_solexa_quality_from_phred(self):
     """Mapping check for function solexa_quality_from_phred"""
     self.assertEqual(-5, round(QualityIO.solexa_quality_from_phred(0)))
     self.assertEqual(-5, round(QualityIO.solexa_quality_from_phred(1)))
     self.assertEqual(-2, round(QualityIO.solexa_quality_from_phred(2)))
     self.assertEqual(0, round(QualityIO.solexa_quality_from_phred(3)))
     self.assertEqual(2, round(QualityIO.solexa_quality_from_phred(4)))
     self.assertEqual(3, round(QualityIO.solexa_quality_from_phred(5)))
     self.assertEqual(5, round(QualityIO.solexa_quality_from_phred(6)))
     self.assertEqual(6, round(QualityIO.solexa_quality_from_phred(7)))
     self.assertEqual(7, round(QualityIO.solexa_quality_from_phred(8)))
     self.assertEqual(8, round(QualityIO.solexa_quality_from_phred(9)))
     for i in range(10, 100):
         self.assertEqual(i, round(QualityIO.solexa_quality_from_phred(i)))
Exemple #10
0
def compare_record(old, new, truncate=None):
    """Quality aware SeqRecord comparison.

    This will check the mapping between Solexa and PHRED scores.
    It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
    """
    if old.id != new.id:
        raise ValueError("'%s' vs '%s' " % (old.id, new.id))
    if old.description != new.description \
    and (old.id+" "+old.description).strip() != new.description:
        raise ValueError("'%s' vs '%s' " % (old.description, new.description))
    if len(old.seq) != len(new.seq):
        raise ValueError("%i vs %i" % (len(old.seq), len(new.seq)))
    if isinstance(old.seq, UnknownSeq) or isinstance(new.seq, UnknownSeq):
        pass
    elif str(old.seq) != str(new.seq):
        if len(old.seq) < 200:
            raise ValueError("'%s' vs '%s'" % (old.seq, new.seq))
        else:
            raise ValueError("'%s...' vs '%s...'" %
                             (old.seq[:100], new.seq[:100]))
    if "phred_quality" in old.letter_annotations \
    and "phred_quality" in new.letter_annotations \
    and old.letter_annotations["phred_quality"] != new.letter_annotations["phred_quality"]:
        if truncate and [min(q,truncate) for q in old.letter_annotations["phred_quality"]] == \
                        [min(q,truncate) for q in new.letter_annotations["phred_quality"]]:
            pass
        else:
            raise ValuerError("Mismatch in phred_quality")
    if "solexa_quality" in old.letter_annotations \
    and "solexa_quality" in new.letter_annotations \
    and old.letter_annotations["solexa_quality"] != new.letter_annotations["solexa_quality"]:
        if truncate and [min(q,truncate) for q in old.letter_annotations["solexa_quality"]] == \
                        [min(q,truncate) for q in new.letter_annotations["solexa_quality"]]:
            pass
        else:
            raise ValueError("Mismatch in phred_quality")
    if "phred_quality" in old.letter_annotations \
    and "solexa_quality" in new.letter_annotations:
        #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
        #Assume "old" is the original, and "new" has been converted.
        converted = [
            round(QualityIO.solexa_quality_from_phred(q))
            for q in old.letter_annotations["phred_quality"]
        ]
        if truncate:
            converted = [min(q, truncate) for q in converted]
        if converted != new.letter_annotations["solexa_quality"]:
            print
            print(old.letter_annotations["phred_quality"])
            print(converted)
            print(new.letter_annotations["solexa_quality"])
            raise ValueError("Mismatch in phred_quality vs solexa_quality")
    if "solexa_quality" in old.letter_annotations \
    and "phred_quality" in new.letter_annotations:
        #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
        #Assume "old" is the original, and "new" has been converted.
        converted = [
            round(QualityIO.phred_quality_from_solexa(q))
            for q in old.letter_annotations["solexa_quality"]
        ]
        if truncate:
            converted = [min(q, truncate) for q in converted]
        if converted != new.letter_annotations["phred_quality"]:
            print(old.letter_annotations["solexa_quality"])
            print(converted)
            print(new.letter_annotations["phred_quality"])
            raise ValueError("Mismatch in solexa_quality vs phred_quality")
    return True