def check_convert(in_filename, in_format, out_format, alphabet=None):
    # Write it out using parse/write
    handle = StringIO()
    aligns = list(AlignIO.parse(in_filename, in_format, None, alphabet))
    try:
        count = AlignIO.write(aligns, handle, out_format)
    except ValueError:
        count = 0
    # Write it out using convert passing filename and handle
    handle2 = StringIO()
    try:
        count2 = AlignIO.convert(in_filename, in_format, handle2, out_format,
                                 alphabet)
    except ValueError:
        count2 = 0
    assert count == count2
    assert handle.getvalue() == handle2.getvalue()
    # Write it out using convert passing handle and handle
    handle2 = StringIO()
    try:
        with open(in_filename) as handle1:
            count2 = AlignIO.convert(handle1, in_format, handle2, out_format,
                                     alphabet)
    except ValueError:
        count2 = 0
    assert count == count2
    assert handle.getvalue() == handle2.getvalue()
def check_convert(in_filename, in_format, out_format, alphabet=None):
    # Write it out using parse/write
    handle = StringIO()
    aligns = list(AlignIO.parse(in_filename, in_format, None, alphabet))
    try:
        count = AlignIO.write(aligns, handle, out_format)
    except ValueError:
        count = 0
    # Write it out using convert passing filename and handle
    handle2 = StringIO()
    try:
        count2 = AlignIO.convert(in_filename, in_format, handle2, out_format, alphabet)
    except ValueError:
        count2 = 0
    assert count == count2
    assert handle.getvalue() == handle2.getvalue()
    # Write it out using convert passing handle and handle
    handle2 = StringIO()
    try:
        with open(in_filename) as handle1:
            count2 = AlignIO.convert(handle1, in_format, handle2, out_format, alphabet)
    except ValueError:
        count2 = 0
    assert count == count2
    assert handle.getvalue() == handle2.getvalue()
Beispiel #3
0
    def simple_check(self, base_name, in_variant):
        for out_variant in ["sanger", "solexa", "illumina"]:
            in_filename = "Quality/%s_original_%s.fastq" \
                          % (base_name, in_variant)
            self.assertTrue(os.path.isfile(in_filename))
            # Load the reference output...
            with open("Quality/%s_as_%s.fastq" % (base_name, out_variant),
                      _universal_read_mode) as handle:
                expected = handle.read()

            with warnings.catch_warnings():
                if out_variant != "sanger":
                    # Ignore data loss warnings from max qualities
                    warnings.simplefilter("ignore", BiopythonWarning)
                    warnings.simplefilter("ignore", UserWarning)
                # Check matches using convert...
                handle = StringIO()
                SeqIO.convert(in_filename, "fastq-" + in_variant,
                              handle, "fastq-" + out_variant)
                self.assertEqual(expected, handle.getvalue())
                # Check matches using parse/write
                handle = StringIO()
                SeqIO.write(SeqIO.parse(in_filename, "fastq-" + in_variant),
                            handle, "fastq-" + out_variant)
                self.assertEqual(expected, handle.getvalue())
    def simple_check(self, base_name, in_variant):
        for out_variant in ["sanger", "solexa", "illumina"]:
            in_filename = "Quality/%s_original_%s.fastq" \
                          % (base_name, in_variant)
            self.assertTrue(os.path.isfile(in_filename))
            # Load the reference output...
            with open("Quality/%s_as_%s.fastq" % (base_name, out_variant),
                      _universal_read_mode) as handle:
                expected = handle.read()

            with warnings.catch_warnings():
                if out_variant != "sanger":
                    # Ignore data loss warnings from max qualities
                    warnings.simplefilter("ignore", BiopythonWarning)
                    warnings.simplefilter("ignore", UserWarning)
                # Check matches using convert...
                handle = StringIO()
                SeqIO.convert(in_filename, "fastq-"+in_variant,
                              handle, "fastq-"+out_variant)
                self.assertEqual(expected, handle.getvalue())
                # Check matches using parse/write
                handle = StringIO()
                SeqIO.write(SeqIO.parse(in_filename, "fastq-"+in_variant),
                            handle, "fastq-"+out_variant)
                self.assertEqual(expected, handle.getvalue())
Beispiel #5
0
    def test_TaggingConsumer(self):

        h = StringIO()
        tc = ParserSupport.TaggingConsumer(handle=h, colwidth=5)
        tc.start_section()
        self.assertEqual(h.getvalue(), "***** start_section\n")
        h.seek(0)
        h.truncate(0)
        tc.test1("myline")
        self.assertEqual(h.getvalue(), "test1: myline\n")
        h.seek(0)
        h.truncate(0)
        tc.end_section()
        self.assertEqual(h.getvalue(), "***** end_section\n")
    def test_widget(self):
        """Try widget derived functionality.
        """
        test_widget = BasicChromosome.ChromosomeSegment()

        expected_string = "chr_percent = 0.25"

        # trick to write the properties to a string
        save_stdout = sys.stdout
        new_stdout = StringIO()
        sys.stdout = new_stdout

        test_widget.dumpProperties()

        properties = new_stdout.getvalue()
        sys.stdout = save_stdout

        self.assertTrue(expected_string in properties,
               "Unexpected results from dumpProperties: \n %s" % properties)

        properties = test_widget.getProperties()
        self.assertEqual(properties["label_size"], 6,
               "Unexpected results from getProperties: %s" % properties)

        test_widget.setProperties({"start_x_position": 12})
        self.assertEqual(test_widget.start_x_position, 12,
               "setProperties doesn't seem to work right: %s"
               % test_widget.start_x_position)
 def test_fastq_1000(self):
     """Read and write back simple example with mixed case 1000bp read"""
     data = "@%s\n%s\n+\n%s\n" \
            % ("id descr goes here", "ACGTNncgta" * 100, "abcd!!efgh" * 100)
     handle = StringIO()
     self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq"))
     self.assertEqual(data, handle.getvalue())
Beispiel #8
0
    def test_write(self):
        result = StringIO()

        e1 = EnrichmentEntry("9951", "structure-specific DNA binding",
                             0.032301032301)
        e1.corrections = {'bh_fdr': 1.0, 'bonferroni': 1.0}
        e1.attrs = {'plot': [0.1, 0.2, 1.0, 0.1]}

        e2 = EnrichmentEntry("9916", "polysomal ribosome", 0.025)
        e2.corrections = {'bh_fdr': 1.0, 'bonferroni': 1.0}
        e2.attrs = {}

        en = Enrichment("ranked parent-child", [e1, e2], ["Cycles found..."],
                        ['bh_fdr', 'bonferroni'])

        writer = EnrichmentWriter(result)
        writer.write(en)
        expected = (
            "# ranked parent-child\r\n"
            "# 2 1\r\n"
            "id\tname\tp-value\tbh_fdr|bonferroni\tattributes\r\n"
            "9951\tstructure-specific DNA binding\t0.032301032301\t1.0|1.0\t{'plot': [0.1, 0.2, 1.0, 0.1]}\r\n"
            "9916\tpolysomal ribosome\t0.025\t1.0|1.0\t{}\r\n"
            "!\tCycles found...\r\n")
        self.assertEqual(expected, result.getvalue())
 def test_fasta_out(self):
     """Check FASTQ to FASTA output"""
     records = SeqIO.parse("Quality/example.fastq", "fastq")
     h = StringIO()
     SeqIO.write(records, h, "fasta")
     with open("Quality/example.fasta") as expected:
         self.assertEqual(h.getvalue(), expected.read())
Beispiel #10
0
    def test_widget(self):
        """Try widget derived functionality.
        """
        test_widget = BasicChromosome.ChromosomeSegment()

        expected_string = "chr_percent = 0.25"

        # trick to write the properties to a string
        save_stdout = sys.stdout
        new_stdout = StringIO()
        sys.stdout = new_stdout

        test_widget.dumpProperties()

        properties = new_stdout.getvalue()
        sys.stdout = save_stdout

        self.assertTrue(
            expected_string in properties,
            "Unexpected results from dumpProperties: \n %s" % properties)

        properties = test_widget.getProperties()
        self.assertEqual(
            properties["label_size"], 6,
            "Unexpected results from getProperties: %s" % properties)

        test_widget.setProperties({"start_x_position": 12})
        self.assertEqual(
            test_widget.start_x_position, 12,
            "setProperties doesn't seem to work right: %s" %
            test_widget.start_x_position)
    def test_qual_negative(self):
        """Check QUAL negative scores mapped to PHRED zero"""
        data = """>1117_10_107_F3
23 31 -1 -1 -1 29 -1 -1 20 32 -1 18 25 7 -1 6 -1 -1 -1 30 -1 20 13 7 -1 -1 21 30 -1 24 -1 22 -1 -1 22 14 -1 12 26 21 -1 5 -1 -1 -1 20 -1 -1 12 28 
>1117_10_146_F3
20 33 -1 -1 -1 29 -1 -1 28 28 -1 7 16 5 -1 30 -1 -1 -1 14 -1 4 13 4 -1 -1 11 13 -1 5 -1 7 -1 -1 10 16 -1 4 12 15 -1 8 -1 -1 -1 16 -1 -1 10 4 
>1117_10_1017_F3
33 33 -1 -1 -1 27 -1 -1 17 16 -1 28 24 11 -1 6 -1 -1 -1 29 -1 8 29 24 -1 -1 8 8 -1 20 -1 13 -1 -1 8 13 -1 28 10 24 -1 10 -1 -1 -1 4 -1 -1 7 6 
>1117_11_136_F3
16 22 -1 -1 -1 33 -1 -1 30 27 -1 27 28 32 -1 29 -1 -1 -1 27 -1 18 9 6 -1 -1 23 16 -1 26 -1 5 7 -1 22 7 -1 18 14 8 -1 8 -1 -1 -1 11 -1 -1 4 24"""
        h = StringIO(data)
        h2 = StringIO()
        self.assertEqual(4, SeqIO.convert(h, "qual", h2, "fastq"))
        self.assertEqual(h2.getvalue(), """@1117_10_107_F3
??????????????????????????????????????????????????
+
8@!!!>!!5A!3:(!'!!!?!5.(!!6?!9!7!!7/!-;6!&!!!5!!-=
@1117_10_146_F3
??????????????????????????????????????????????????
+
5B!!!>!!==!(1&!?!!!/!%.%!!,.!&!(!!+1!%-0!)!!!1!!+%
@1117_10_1017_F3
??????????????????????????????????????????????????
+
BB!!!<!!21!=9,!'!!!>!)>9!!))!5!.!!).!=+9!+!!!%!!('
@1117_11_136_F3
??????????????????????????????????????????????????
+
17!!!B!!?<!<=A!>!!!<!3*'!!81!;!&(!7(!3/)!)!!!,!!%9
""")
 def test_fastq_2000(self):
     """Read and write back simple example with upper case 2000bp read"""
     data = "@%s\n%s\n+\n%s\n" \
            % ("id descr goes here", "ACGT"*500, "!@a~"*500)
     handle = StringIO()
     self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq"))
     self.assertEqual(data, handle.getvalue())
Beispiel #13
0
    def test_write(self):
        terms_to_write = [
            OntologyTerm("GO:0009628", "response to abiotic stimulus",
                         {"is_a": ["GO:0050896"]}),
            OntologyTerm("GO:0022627", "cytosolic small ribosomal subunit",
                         {"is_a": ["GO:0015935", "GO:0044445"]})
        ]
        f = StringIO()
        writer = OboWriter(f, version="1.2")
        writer.write(terms_to_write)

        expected_output = """format-version:1.2

[Term]
id: GO:0009628
name: response to abiotic stimulus
is_a: GO:0050896

[Term]
id: GO:0022627
name: cytosolic small ribosomal subunit
is_a: GO:0015935
is_a: GO:0044445
"""
        self.assertEqual(expected_output, f.getvalue())
 def test_fastq_1000(self):
     """Read and write back simple example with mixed case 1000bp read"""
     data = "@%s\n%s\n+\n%s\n" \
            % ("id descr goes here", "ACGTNncgta"*100, "abcd!!efgh"*100)
     handle = StringIO()
     self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq"))
     self.assertEqual(data, handle.getvalue())
Beispiel #15
0
    def test_qual_negative(self):
        """Check QUAL negative scores mapped to PHRED zero"""
        data = """>1117_10_107_F3
23 31 -1 -1 -1 29 -1 -1 20 32 -1 18 25 7 -1 6 -1 -1 -1 30 -1 20 13 7 -1 -1 21 30 -1 24 -1 22 -1 -1 22 14 -1 12 26 21 -1 5 -1 -1 -1 20 -1 -1 12 28 
>1117_10_146_F3
20 33 -1 -1 -1 29 -1 -1 28 28 -1 7 16 5 -1 30 -1 -1 -1 14 -1 4 13 4 -1 -1 11 13 -1 5 -1 7 -1 -1 10 16 -1 4 12 15 -1 8 -1 -1 -1 16 -1 -1 10 4 
>1117_10_1017_F3
33 33 -1 -1 -1 27 -1 -1 17 16 -1 28 24 11 -1 6 -1 -1 -1 29 -1 8 29 24 -1 -1 8 8 -1 20 -1 13 -1 -1 8 13 -1 28 10 24 -1 10 -1 -1 -1 4 -1 -1 7 6 
>1117_11_136_F3
16 22 -1 -1 -1 33 -1 -1 30 27 -1 27 28 32 -1 29 -1 -1 -1 27 -1 18 9 6 -1 -1 23 16 -1 26 -1 5 7 -1 22 7 -1 18 14 8 -1 8 -1 -1 -1 11 -1 -1 4 24"""
        h = StringIO(data)
        h2 = StringIO()
        self.assertEqual(4, SeqIO.convert(h, "qual", h2, "fastq"))
        self.assertEqual(
            h2.getvalue(), """@1117_10_107_F3
??????????????????????????????????????????????????
+
8@!!!>!!5A!3:(!'!!!?!5.(!!6?!9!7!!7/!-;6!&!!!5!!-=
@1117_10_146_F3
??????????????????????????????????????????????????
+
5B!!!>!!==!(1&!?!!!/!%.%!!,.!&!(!!+1!%-0!)!!!1!!+%
@1117_10_1017_F3
??????????????????????????????????????????????????
+
BB!!!<!!21!=9,!'!!!>!)>9!!))!5!.!!).!=+9!+!!!%!!('
@1117_11_136_F3
??????????????????????????????????????????????????
+
17!!!B!!?<!<=A!>!!!<!3*'!!81!;!&(!7(!3/)!)!!!,!!%9
""")
Beispiel #16
0
 def test_write_species(self):
     """Test writing species from annotation tags."""
     record = SeqIO.read("SwissProt/sp016", "swiss")
     self.assertEqual(record.annotations["organism"],
                      "H**o sapiens (Human)")
     self.assertEqual(record.annotations["ncbi_taxid"], ["9606"])
     handle = StringIO()
     SeqIO.write(record, handle, "seqxml")
     handle.seek(0)
     output = handle.getvalue()
     self.assertIn("H**o sapiens (Human)", output)
     self.assertIn("9606", output)
     if '<species name="H**o sapiens (Human)" ncbiTaxID="9606"/>' in output:
         # Good, but don't get this (do we?)
         pass
     elif '<species name="H**o sapiens (Human)" ncbiTaxID="9606"></species>' in output:
         # Not as concise, but fine (seen on C Python)
         pass
     elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"></species>' in output:
         # Jython uses a different order
         pass
     elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"/>' in output:
         # This would be fine too, but don't get this (do we?)
         pass
     else:
         raise ValueError("Mising expected <species> tag: %r" % output)
Beispiel #17
0
 def test_fasta_out(self):
     """Check FASTQ to FASTA output"""
     records = SeqIO.parse("Quality/example.fastq", "fastq")
     h = StringIO()
     SeqIO.write(records, h, "fasta")
     with open("Quality/example.fasta") as expected:
         self.assertEqual(h.getvalue(), expected.read())
 def test_fastq_2000(self):
     """Read and write back simple example with upper case 2000bp read"""
     data = "@%s\n%s\n+\n%s\n" \
            % ("id descr goes here", "ACGT" * 500, "!@a~" * 500)
     handle = StringIO()
     self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq"))
     self.assertEqual(data, handle.getvalue())
 def test_write_species(self):
     """Test writing species from annotation tags."""
     record = SeqIO.read("SwissProt/sp016", "swiss")
     self.assertEqual(record.annotations["organism"], "H**o sapiens (Human)")
     self.assertEqual(record.annotations["ncbi_taxid"], ["9606"])
     handle = StringIO()
     SeqIO.write(record, handle, "seqxml")
     handle.seek(0)
     output = handle.getvalue()
     self.assertTrue("H**o sapiens (Human)" in output)
     self.assertTrue("9606" in output)
     if '<species name="H**o sapiens (Human)" ncbiTaxID="9606"/>' in output:
         # Good, but don't get this (do we?)
         pass
     elif '<species name="H**o sapiens (Human)" ncbiTaxID="9606"></species>' in output:
         # Not as concise, but fine (seen on C Python)
         pass
     elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"></species>' in output:
         # Jython uses a different order
         pass
     elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"/>' in output:
         # This would be fine too, but don't get this (do we?)
         pass
     else:
         raise ValueError("Mising expected <species> tag: %r" % output)
Beispiel #20
0
 def test_format_branch_length(self):
     """Custom format string for Newick branch length serialization."""
     tree = Phylo.read(StringIO("A:0.1;"), "newick")
     mem_file = StringIO()
     Phylo.write(tree, mem_file, "newick", format_branch_length="%.0e")
     # Py2.5 compat: Windows with Py2.5- represents this as 1e-001;
     # on all other platforms it's 1e-01
     self.assertTrue(mem_file.getvalue().strip() in ["A:1e-01;", "A:1e-001;"])
 def test_format_phylip(self):
     dm = DistanceMatrix(self.names, self.matrix)
     handle = StringIO()
     dm.format_phylip(handle)
     lines = handle.getvalue().splitlines()
     self.assertEqual(len(lines), len(dm) + 1)
     self.assertTrue(lines[0].endswith(str(len(dm))))
     for name, line in zip(self.names, lines[1:]):
         self.assertTrue(line.startswith(name))
 def test_fastq_rna(self):
     """Read and write back simple example with ambiguous RNA"""
     # First in upper case...
     data = "@%s\n%s\n+\n%s\n" \
            % ("id descr goes here",
               ambiguous_rna_letters.upper(),
               "".join(chr(33 + q) for q in range(len(ambiguous_rna_letters))))
     handle = StringIO()
     self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq"))
     self.assertEqual(data, handle.getvalue())
     # Now in lower case...
     data = "@%s\n%s\n+\n%s\n" \
            % ("id descr goes here",
               ambiguous_rna_letters.lower(),
               "".join(chr(33 + q) for q in range(len(ambiguous_rna_letters))))
     handle = StringIO()
     self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq"))
     self.assertEqual(data, handle.getvalue())
 def test_fastq_rna(self):
     """Read and write back simple example with ambiguous RNA"""
     #First in upper case...
     data = "@%s\n%s\n+\n%s\n" \
            % ("id descr goes here",
               ambiguous_rna_letters.upper(),
               "".join(chr(33+q) for q in range(len(ambiguous_rna_letters))))
     handle = StringIO()
     self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq"))
     self.assertEqual(data, handle.getvalue())
     #Now in lower case...
     data = "@%s\n%s\n+\n%s\n" \
            % ("id descr goes here",
               ambiguous_rna_letters.lower(),
               "".join(chr(33+q) for q in range(len(ambiguous_rna_letters))))
     handle = StringIO()
     self.assertEqual(1, SeqIO.write(SeqIO.parse(StringIO(data), "fastq"), handle, "fastq"))
     self.assertEqual(data, handle.getvalue())
Beispiel #24
0
 def test_format_branch_length(self):
     """Custom format string for Newick branch length serialization."""
     tree = Phylo.read(StringIO('A:0.1;'), 'newick')
     mem_file = StringIO()
     Phylo.write(tree, mem_file, 'newick', format_branch_length='%.0e')
     # Py2.5 compat: Windows with Py2.5- represents this as 1e-001;
     # on all other platforms it's 1e-01
     self.assertTrue(
         mem_file.getvalue().strip() in ['A:1e-01;', 'A:1e-001;'])
 def test_format_phylip(self):
     dm = DistanceMatrix(self.names, self.matrix)
     handle = StringIO()
     dm.format_phylip(handle)
     lines = handle.getvalue().splitlines()
     self.assertEqual(len(lines), len(dm) + 1)
     self.assertTrue(lines[0].endswith(str(len(dm))))
     for name, line in zip(self.names, lines[1:]):
         self.assertTrue(line.startswith(name))
Beispiel #26
0
    def testParse(self):
        f = open("./SCOP/dir.cla.scop.txt_test")
        try:
            cla = f.read()
            f.close()

            f = open("./SCOP/dir.des.scop.txt_test")
            des = f.read()
            f.close()

            f = open("./SCOP/dir.hie.scop.txt_test")
            hie = f.read()
        finally:
            f.close()

        scop = Scop(StringIO(cla), StringIO(des), StringIO(hie))

        cla_out = StringIO()
        scop.write_cla(cla_out)
        lines = zip(cla.rstrip().split('\n'),
                    cla_out.getvalue().rstrip().split('\n'))
        for expected_line, line in lines:
            self.assertTrue(self._compare_cla_lines(expected_line, line))

        des_out = StringIO()
        scop.write_des(des_out)
        self.assertEqual(des_out.getvalue(), des)

        hie_out = StringIO()
        scop.write_hie(hie_out)
        self.assertEqual(hie_out.getvalue(), hie)

        domain = scop.getDomainBySid("d1hbia_")
        self.assertEqual(domain.sunid, 14996)

        domains = scop.getDomains()
        self.assertEqual(len(domains), 14)
        self.assertEqual(domains[4].sunid, 14988)

        dom = scop.getNodeBySunid(-111)
        self.assertEqual(dom, None)
        dom = scop.getDomainBySid("no such domain")
        self.assertEqual(dom, None)
Beispiel #27
0
    def testParse(self):
        f = open("./SCOP/dir.cla.scop.txt_test")
        try:
            cla = f.read()
            f.close()

            f = open("./SCOP/dir.des.scop.txt_test")
            des = f.read()
            f.close()

            f = open("./SCOP/dir.hie.scop.txt_test")
            hie = f.read()
        finally:
            f.close()

        scop = Scop(StringIO(cla), StringIO(des), StringIO(hie))

        cla_out = StringIO()
        scop.write_cla(cla_out)
        lines = zip(cla.rstrip().split('\n'),
                    cla_out.getvalue().rstrip().split('\n'))
        for expected_line, line in lines:
            self.assertTrue(self._compare_cla_lines(expected_line, line))

        des_out = StringIO()
        scop.write_des(des_out)
        self.assertEqual(des_out.getvalue(), des)

        hie_out = StringIO()
        scop.write_hie(hie_out)
        self.assertEqual(hie_out.getvalue(), hie)

        domain = scop.getDomainBySid("d1hbia_")
        self.assertEqual(domain.sunid, 14996)

        domains = scop.getDomains()
        self.assertEqual(len(domains), 14)
        self.assertEqual(domains[4].sunid, 14988)

        dom = scop.getNodeBySunid(-111)
        self.assertEqual(dom, None)
        dom = scop.getDomainBySid("no such domain")
        self.assertEqual(dom, None)
    def test_write(self):
        correct_output_a = """graph [
  directed 1
  node [
    id 0
    label "1"
    a 1
  ]
  node [
    id 1
    label "2"
  ]
  edge [
    source 0
    target 1
    x "x"
  ]
  edge [
    source 1
    target 0
    label "zzzz"
  ]
]"""
        correct_output_b = """graph [
  directed 1
  node [
    id 0
    label "2"
  ]
  node [
    id 1
    label "1"
    a 1
  ]
  edge [
    source 1
    target 0
    x "x"
  ]
  edge [
    source 0
    target 1
    label "zzzz"
  ]
]"""
        out = StringIO()
        writer = GmlWriter(out)
        graph = DiGraph()
        graph.add_node(1, {'a': 1})
        graph.add_edge(1, 2, {'x': 'x'})
        graph.add_edge(2, 1, "zzzz")
        writer.write(graph)
        self.assertIn(out.getvalue(), set([correct_output_a,
                                           correct_output_b]))
Beispiel #29
0
 def test_locus_line_topogoly(self):
     """Test if chromosome topology is conserved"""
     record = SeqIO.read('GenBank/DS830848.gb', 'genbank')
     self.assertEqual(record.annotations['topology'], 'linear')
     out_handle = StringIO()
     SeqIO.write([record], out_handle, 'genbank')
     first_line = out_handle.getvalue().split('\n')[0]
     self.assertIn('linear', first_line)
     with open('GenBank/DS830848.gb', 'r') as fh:
         orig_first_line = fh.readline().strip()
     self.assertEqual(first_line, orig_first_line)
 def test_locus_line_topogoly(self):
     """Test if chromosome topology is conserved"""
     record = SeqIO.read('GenBank/DS830848.gb', 'genbank')
     self.assertEqual(record.annotations['topology'], 'linear')
     out_handle = StringIO()
     SeqIO.write([record], out_handle, 'genbank')
     first_line = out_handle.getvalue().split('\n')[0]
     self.assertIn('linear', first_line)
     with open('GenBank/DS830848.gb', 'r') as fh:
         orig_first_line = fh.readline().strip()
     self.assertEqual(first_line, orig_first_line)
def check_convert(in_filename, in_format, out_format, alphabet=None):
    records = list(SeqIO.parse(in_filename, in_format, alphabet))
    # Write it out...
    handle = StringIO()
    qual_truncate = truncation_expected(out_format)
    with warnings.catch_warnings():
        if qual_truncate:
            warnings.simplefilter('ignore', UserWarning)
        SeqIO.write(records, handle, out_format)
    handle.seek(0)
    # Now load it back and check it agrees,
    records2 = list(SeqIO.parse(handle, out_format, alphabet))
    compare_records(records, records2, qual_truncate)
    # Finally, use the convert function, and check that agrees:
    handle2 = StringIO()
    with warnings.catch_warnings():
        if qual_truncate:
            warnings.simplefilter('ignore', UserWarning)
        SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet)
    # We could re-parse this, but it is simpler and stricter:
    assert handle.getvalue() == handle2.getvalue()
    def test_write(self):
        correct_output_a = """graph [
  directed 1
  node [
    id 0
    label "1"
    a 1
  ]
  node [
    id 1
    label "2"
  ]
  edge [
    source 0
    target 1
    x "x"
  ]
  edge [
    source 1
    target 0
    label "zzzz"
  ]
]"""
        correct_output_b =  """graph [
  directed 1
  node [
    id 0
    label "2"
  ]
  node [
    id 1
    label "1"
    a 1
  ]
  edge [
    source 1
    target 0
    x "x"
  ]
  edge [
    source 0
    target 1
    label "zzzz"
  ]
]"""
        out = StringIO()
        writer = GmlWriter(out)
        graph = DiGraph()
        graph.add_node(1, {'a' : 1 })
        graph.add_edge(1, 2, {'x' : 'x'})
        graph.add_edge(2, 1, "zzzz")
        writer.write(graph)
        self.assertIn(out.getvalue(), set([correct_output_a, correct_output_b]))
Beispiel #33
0
def check_convert(in_filename, in_format, out_format, alphabet=None):
    records = list(SeqIO.parse(in_filename, in_format, alphabet))
    # Write it out...
    handle = StringIO()
    qual_truncate = truncation_expected(out_format)
    with warnings.catch_warnings():
        if qual_truncate:
            warnings.simplefilter("ignore", BiopythonWarning)
        SeqIO.write(records, handle, out_format)
    handle.seek(0)
    # Now load it back and check it agrees,
    records2 = list(SeqIO.parse(handle, out_format, alphabet))
    compare_records(records, records2, qual_truncate)
    # Finally, use the convert function, and check that agrees:
    handle2 = StringIO()
    with warnings.catch_warnings():
        if qual_truncate:
            warnings.simplefilter("ignore", BiopythonWarning)
        SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet)
    # We could re-parse this, but it is simpler and stricter:
    assert handle.getvalue() == handle2.getvalue()
Beispiel #34
0
    def __format__(self, format_spec):
        """Returns the alignment as a string in the specified file format.

        This method supports the python format() function added in
        Python 2.6/3.0.  The format_spec should be a lower case
        string supported by Bio.AlignIO as an output file format.
        See also the alignment's format() method."""
        if format_spec:
            from Bio._py3k import StringIO
            from Bio import AlignIO
            handle = StringIO()
            AlignIO.write([self], handle, format_spec)
            return handle.getvalue()
        else:
            #Follow python convention and default to using __str__
            return str(self)
    def __format__(self, format_spec):
        """Returns the alignment as a string in the specified file format.

        This method supports the python format() function added in
        Python 2.6/3.0.  The format_spec should be a lower case
        string supported by Bio.AlignIO as an output file format.
        See also the alignment's format() method."""
        if format_spec:
            from Bio._py3k import StringIO
            from Bio import AlignIO
            handle = StringIO()
            AlignIO.write([self], handle, format_spec)
            return handle.getvalue()
        else:
            # Follow python convention and default to using __str__
            return str(self)
Beispiel #36
0
    def __format__(self, format_spec):
        """Serialize the tree as a string in the specified file format.

        This method supports the ``format`` built-in function added in Python
        2.6/3.0.

        :param format_spec: a lower-case string supported by `Bio.Phylo.write`
            as an output file format.
        """
        if format_spec:
            from Bio._py3k import StringIO
            from Bio.Phylo import _io
            handle = StringIO()
            _io.write([self], handle, format_spec)
            return handle.getvalue()
        else:
            # Follow python convention and default to using __str__
            return str(self)
Beispiel #37
0
    def __format__(self, format_spec):
        """Serialize the tree as a string in the specified file format.

        This method supports the ``format`` built-in function added in Python
        2.6/3.0.

        :param format_spec: a lower-case string supported by `Bio.Phylo.write`
            as an output file format.
        """
        if format_spec:
            from Bio._py3k import StringIO
            from Bio.Phylo import _io
            handle = StringIO()
            _io.write([self], handle, format_spec)
            return handle.getvalue()
        else:
            # Follow python convention and default to using __str__
            return str(self)
Beispiel #38
0
    def write_to_string(self, output='PS', dpi=72):
        """ write(self, output='PS')

            o output        String indicating output format, one of PS, PDF,
                            SVG, JPG, BMP, GIF, PNG, TIFF or TIFF (as
                            specified for the write method).

            o dpi           Resolution (dots per inch) for bitmap formats.

            Return the completed drawing as a string in a prescribed format
        """
        #The ReportLab drawToString method, which this function used to call,
        #just uses a cStringIO or StringIO handle with the drawToFile method.
        #In order to put all our complicated file format specific code in one
        #place we'll just use a StringIO handle here:
        from Bio._py3k import StringIO
        handle = StringIO()
        self.write(handle, output, dpi)
        return handle.getvalue()
Beispiel #39
0
    def write_to_string(self, output='PS', dpi=72):
        """ write(self, output='PS')

            o output        String indicating output format, one of PS, PDF,
                            SVG, JPG, BMP, GIF, PNG, TIFF or TIFF (as
                            specified for the write method).

            o dpi           Resolution (dots per inch) for bitmap formats.

            Return the completed drawing as a string in a prescribed format
        """
        #The ReportLab drawToString method, which this function used to call,
        #just uses a cStringIO or StringIO handle with the drawToFile method.
        #In order to put all our complicated file format specific code in one
        #place we'll just use a StringIO handle here:
        from Bio._py3k import StringIO
        handle = StringIO()
        self.write(handle, output, dpi)
        return handle.getvalue()
    def test_write(self):
        result = StringIO()

        e1 = EnrichmentEntry("9951", "structure-specific DNA binding", 0.032301032301)
        e1.corrections = {'bh_fdr': 1.0, 'bonferroni': 1.0}
        e1.attrs = {'plot' : [0.1, 0.2, 1.0, 0.1]}
        
        e2 = EnrichmentEntry("9916", "polysomal ribosome", 0.025)
        e2.corrections = {'bh_fdr': 1.0, 'bonferroni': 1.0}
        e2.attrs = {}
        
        en = Enrichment("ranked parent-child", [e1, e2], ["Cycles found..."], ['bh_fdr', 'bonferroni'])
        
        writer = EnrichmentWriter(result)
        writer.write(en)
        expected = ("# ranked parent-child\r\n"
                    "# 2 1\r\n"
                    "id\tname\tp-value\tbh_fdr|bonferroni\tattributes\r\n"
                    "9951\tstructure-specific DNA binding\t0.032301032301\t1.0|1.0\t{'plot': [0.1, 0.2, 1.0, 0.1]}\r\n"
                    "9916\tpolysomal ribosome\t0.025\t1.0|1.0\t{}\r\n"
                    "!\tCycles found...\r\n")
        self.assertEqual(expected, result.getvalue())
    def test_write(self):
        terms_to_write = [OntologyTerm("GO:0009628", "response to abiotic stimulus",
                                 {"is_a" : ["GO:0050896"]}),
                          OntologyTerm("GO:0022627", "cytosolic small ribosomal subunit",
                                 {"is_a" : ["GO:0015935", "GO:0044445"]})]
        f = StringIO()
        writer = OboWriter(f, version = "1.2")
        writer.write(terms_to_write)
        
        expected_output = """format-version:1.2

[Term]
id: GO:0009628
name: response to abiotic stimulus
is_a: GO:0050896

[Term]
id: GO:0022627
name: cytosolic small ribosomal subunit
is_a: GO:0015935
is_a: GO:0044445
"""
        self.assertEqual(expected_output, f.getvalue())     
Beispiel #42
0
    def test_read_write_clustal(self):
        """Test the base alignment stuff."""
        path = os.path.join(os.getcwd(), "Clustalw", "opuntia.aln")
        alignment = AlignIO.read(path, "clustal", alphabet=Alphabet.Gapped(IUPAC.unambiguous_dna))
        self.assertEqual(len(alignment), 7)
        seq_record = alignment[0]
        self.assertEqual(seq_record.description, "gi|6273285|gb|AF191659.1|AF191")
        self.assertEqual(seq_record.seq, Seq("TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGTACCAGA"))
        seq_record = alignment[1]
        self.assertEqual(seq_record.description, "gi|6273284|gb|AF191658.1|AF191")
        self.assertEqual(seq_record.seq, "TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATA--------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA")
        seq_record = alignment[2]
        self.assertEqual(seq_record.description, "gi|6273287|gb|AF191661.1|AF191")
        self.assertEqual(seq_record.seq, "TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA")
        seq_record = alignment[3]
        self.assertEqual(seq_record.description, "gi|6273286|gb|AF191660.1|AF191")
        self.assertEqual(seq_record.seq, "TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA")
        seq_record = alignment[4]
        self.assertEqual(seq_record.description, "gi|6273290|gb|AF191664.1|AF191")
        self.assertEqual(seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA")
        seq_record = alignment[5]
        self.assertEqual(seq_record.description, "gi|6273289|gb|AF191663.1|AF191")
        self.assertEqual(seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTATACCAGA")
        seq_record = alignment[6]
        self.assertEqual(seq_record.description, "gi|6273291|gb|AF191665.1|AF191")
        self.assertEqual(seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA")
        self.assertEqual(alignment.get_alignment_length(), 156)
        align_info = AlignInfo.SummaryInfo(alignment)
        consensus = align_info.dumb_consensus()
        self.assertIsInstance(consensus, Seq)
        self.assertEqual(consensus, "TATACATTAAAGXAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTXCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA")
        dictionary = align_info.replacement_dictionary(["N"])
        self.assertEqual(len(dictionary), 16)
        self.assertAlmostEqual(dictionary[("A", "A")], 1395.0, places=1)
        self.assertAlmostEqual(dictionary[("A", "C")], 3.0, places=1)
        self.assertAlmostEqual(dictionary[("A", "G")], 13.0, places=1)
        self.assertAlmostEqual(dictionary[("A", "T")], 6.0, places=1)
        self.assertAlmostEqual(dictionary[("C", "A")], 3.0, places=1)
        self.assertAlmostEqual(dictionary[("C", "C")], 271.0, places=1)
        self.assertAlmostEqual(dictionary[("C", "G")], 0, places=1)
        self.assertAlmostEqual(dictionary[("C", "T")], 16.0, places=1)
        self.assertAlmostEqual(dictionary[("G", "A")], 5.0, places=1)
        self.assertAlmostEqual(dictionary[("G", "C")], 0, places=1)
        self.assertAlmostEqual(dictionary[("G", "G")], 480.0, places=1)
        self.assertAlmostEqual(dictionary[("G", "T")], 0, places=1)
        self.assertAlmostEqual(dictionary[("T", "A")], 6.0, places=1)
        self.assertAlmostEqual(dictionary[("T", "C")], 12.0, places=1)
        self.assertAlmostEqual(dictionary[("T", "G")], 0, places=1)
        self.assertAlmostEqual(dictionary[("T", "T")], 874.0, places=1)
        matrix = align_info.pos_specific_score_matrix(consensus, ["N"])
        self.assertEqual(str(matrix), """\
    A   C   G   T
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  1.0 0.0 0.0 6.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
X  4.0 0.0 3.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
C  0.0 7.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
C  0.0 7.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 4.0
A  4.0 0.0 0.0 0.0
T  0.0 0.0 0.0 3.0
A  3.0 0.0 0.0 0.0
T  0.0 0.0 0.0 1.0
A  1.0 0.0 0.0 0.0
T  0.0 0.0 0.0 1.0
A  1.0 0.0 0.0 0.0
T  0.0 0.0 0.0 1.0
A  1.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
C  1.0 6.0 0.0 0.0
A  6.0 0.0 0.0 1.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
X  0.0 3.0 0.0 4.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 2.0 0.0 5.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
T  0.0 1.0 0.0 6.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
T  0.0 0.0 0.0 7.0
G  1.0 0.0 6.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
""")

        matrix = align_info.pos_specific_score_matrix(chars_to_ignore=["N"])
        self.assertEqual(str(matrix), """\
    A   C   G   T
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  1.0 0.0 0.0 6.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
X  4.0 0.0 3.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
C  0.0 7.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
C  0.0 7.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 4.0
A  4.0 0.0 0.0 0.0
T  0.0 0.0 0.0 3.0
A  3.0 0.0 0.0 0.0
T  0.0 0.0 0.0 1.0
A  1.0 0.0 0.0 0.0
T  0.0 0.0 0.0 1.0
A  1.0 0.0 0.0 0.0
T  0.0 0.0 0.0 1.0
A  1.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
C  1.0 6.0 0.0 0.0
A  6.0 0.0 0.0 1.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
X  0.0 3.0 0.0 4.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 2.0 0.0 5.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
T  0.0 1.0 0.0 6.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
T  0.0 0.0 0.0 7.0
G  1.0 0.0 6.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
""")

        second_seq = alignment[1].seq
        matrix = align_info.pos_specific_score_matrix(second_seq, ["N"])
        self.assertEqual(str(matrix), """\
    A   C   G   T
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  1.0 0.0 0.0 6.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  4.0 0.0 3.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
C  0.0 7.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
G  0.0 0.0 7.0 0.0
C  0.0 7.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 4.0
A  4.0 0.0 0.0 0.0
-  0.0 0.0 0.0 3.0
-  3.0 0.0 0.0 0.0
-  0.0 0.0 0.0 1.0
-  1.0 0.0 0.0 0.0
-  0.0 0.0 0.0 1.0
-  1.0 0.0 0.0 0.0
-  0.0 0.0 0.0 1.0
-  1.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
C  1.0 6.0 0.0 0.0
A  6.0 0.0 0.0 1.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
T  0.0 3.0 0.0 4.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
C  0.0 2.0 0.0 5.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
C  0.0 7.0 0.0 0.0
T  0.0 1.0 0.0 6.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
T  0.0 0.0 0.0 7.0
G  1.0 0.0 6.0 0.0
T  0.0 0.0 0.0 7.0
A  7.0 0.0 0.0 0.0
C  0.0 7.0 0.0 0.0
C  0.0 7.0 0.0 0.0
A  7.0 0.0 0.0 0.0
G  0.0 0.0 7.0 0.0
A  7.0 0.0 0.0 0.0
""")
        value = align_info.information_content(5, 50, chars_to_ignore=["N"])
        self.assertAlmostEqual(value, 88.42, places=2)
        value = align_info.information_content(chars_to_ignore=["N"])
        self.assertAlmostEqual(value, 287.55, places=2)
        e_freq = {"G": 0.25, "C": 0.25, "A": 0.25, "T": 0.25}
        e_freq_table = FreqTable.FreqTable(e_freq, FreqTable.FREQ,
                                           IUPAC.unambiguous_dna)
        value = align_info.information_content(e_freq_table=e_freq_table,
                                               chars_to_ignore=["N"])
        self.assertAlmostEqual(value, 287.55, places=2)
        self.assertEqual(align_info.get_column(1), "AAAAAAA")
        self.assertAlmostEqual(align_info.ic_vector[1], 2.00, places=2)
        self.assertEqual(align_info.get_column(7), "TTTATTT")
        self.assertAlmostEqual(align_info.ic_vector[7], 1.41, places=2)
        handle = StringIO()
        AlignInfo.print_info_content(align_info, fout=handle)
        self.assertEqual(handle.getvalue(), """\
0 T 2.000
1 A 2.000
2 T 2.000
3 A 2.000
4 C 2.000
5 A 2.000
6 T 2.000
7 T 1.408
8 A 2.000
9 A 2.000
10 A 2.000
11 G 2.000
12 A 1.015
13 A 2.000
14 G 2.000
15 G 2.000
16 G 2.000
17 G 2.000
18 G 2.000
19 A 2.000
20 T 2.000
21 G 2.000
22 C 2.000
23 G 2.000
24 G 2.000
25 A 2.000
26 T 2.000
27 A 2.000
28 A 2.000
29 A 2.000
30 T 2.000
31 G 2.000
32 G 2.000
33 A 2.000
34 A 2.000
35 A 2.000
36 G 2.000
37 G 2.000
38 C 2.000
39 G 2.000
40 A 2.000
41 A 2.000
42 A 2.000
43 G 2.000
44 A 2.000
45 A 2.000
46 A 2.000
47 G 2.000
48 A 2.000
49 A 2.000
50 T 2.000
51 A 2.000
52 T 2.000
53 A 2.000
54 T 2.000
55 A 2.000
56 - 0.682
57 - 0.682
58 - 0.333
59 - 0.333
60 - -0.115
61 - -0.115
62 - -0.115
63 - -0.115
64 - -0.115
65 - -0.115
66 A 2.000
67 T 2.000
68 A 2.000
69 T 2.000
70 A 2.000
71 T 2.000
72 T 2.000
73 T 2.000
74 C 1.408
75 A 1.408
76 A 2.000
77 A 2.000
78 T 2.000
79 T 2.000
80 T 1.015
81 C 2.000
82 C 2.000
83 T 2.000
84 T 2.000
85 A 2.000
86 T 2.000
87 A 2.000
88 T 2.000
89 A 2.000
90 C 1.137
91 C 2.000
92 C 2.000
93 A 2.000
94 A 2.000
95 A 2.000
96 T 2.000
97 A 2.000
98 T 2.000
99 A 2.000
100 A 2.000
101 A 2.000
102 A 2.000
103 A 2.000
104 T 2.000
105 A 2.000
106 T 2.000
107 C 2.000
108 T 2.000
109 A 2.000
110 A 2.000
111 T 2.000
112 A 2.000
113 A 2.000
114 A 2.000
115 T 2.000
116 T 2.000
117 A 2.000
118 G 2.000
119 A 2.000
120 T 2.000
121 G 2.000
122 A 2.000
123 A 2.000
124 T 2.000
125 A 2.000
126 T 2.000
127 C 2.000
128 A 2.000
129 A 2.000
130 A 2.000
131 G 2.000
132 A 2.000
133 A 2.000
134 T 2.000
135 C 2.000
136 C 1.408
137 A 2.000
138 T 2.000
139 T 2.000
140 G 2.000
141 A 2.000
142 T 2.000
143 T 2.000
144 T 2.000
145 A 2.000
146 G 2.000
147 T 2.000
148 G 1.408
149 T 2.000
150 A 2.000
151 C 2.000
152 C 2.000
153 A 2.000
154 G 2.000
155 A 2.000
""")
Beispiel #43
0
def check_simple_write_read(alignments, indent=" "):
    # print(indent+"Checking we can write and then read back these alignments")
    for format in test_write_read_align_with_seq_count:
        records_per_alignment = len(alignments[0])
        for a in alignments:
            if records_per_alignment != len(a):
                records_per_alignment = None
        # Can we expect this format to work?
        if not records_per_alignment \
        and format not in test_write_read_alignment_formats:
            continue

        print(indent + "Checking can write/read as '%s' format" % format)

        # Going to write to a handle...
        handle = StringIO()

        try:
            c = AlignIO.write(alignments, handle=handle, format=format)
            assert c == len(alignments)
        except ValueError as e:
            # This is often expected to happen, for example when we try and
            # write sequences of different lengths to an alignment file.
            print(indent + "Failed: %s" % str(e))
            # Carry on to the next format:
            continue

        # First, try with the seq_count
        if records_per_alignment:
            handle.flush()
            handle.seek(0)
            try:
                alignments2 = list(AlignIO.parse(handle=handle, format=format,
                                                 seq_count=records_per_alignment))
            except ValueError as e:
                # This is BAD.  We can't read our own output.
                # I want to see the output when called from the test harness,
                # run_tests.py (which can be funny about new lines on Windows)
                handle.seek(0)
                raise ValueError("%s\n\n%s\n\n%s"
                                  % (str(e), repr(handle.read()), repr(alignments2)))
            simple_alignment_comparison(alignments, alignments2, format)

        if format in test_write_read_alignment_formats:
            # Don't need the seq_count
            handle.flush()
            handle.seek(0)
            try:
                alignments2 = list(AlignIO.parse(handle=handle, format=format))
            except ValueError as e:
                # This is BAD.  We can't read our own output.
                # I want to see the output when called from the test harness,
                # run_tests.py (which can be funny about new lines on Windows)
                handle.seek(0)
                raise ValueError("%s\n\n%s\n\n%s"
                                  % (str(e), repr(handle.read()), repr(alignments2)))
            simple_alignment_comparison(alignments, alignments2, format)

        if len(alignments) > 1:
            # Try writing just one Alignment (not a list)
            handle = StringIO()
            SeqIO.write(alignments[0], handle, format)
            assert handle.getvalue() == alignments[0].format(format)
Beispiel #44
0
def check_simple_write_read(alignments, indent=" "):
    # print(indent+"Checking we can write and then read back these alignments")
    for format in test_write_read_align_with_seq_count:
        records_per_alignment = len(alignments[0])
        for a in alignments:
            if records_per_alignment != len(a):
                records_per_alignment = None
        # Can we expect this format to work?
        if not records_per_alignment \
                and format not in test_write_read_alignment_formats:
            continue

        print(indent + "Checking can write/read as '%s' format" % format)

        # Going to write to a handle...
        handle = StringIO()

        try:
            c = AlignIO.write(alignments, handle=handle, format=format)
            assert c == len(alignments)
        except ValueError as e:
            # This is often expected to happen, for example when we try and
            # write sequences of different lengths to an alignment file.
            print(indent + "Failed: %s" % str(e))
            # Carry on to the next format:
            continue

        # First, try with the seq_count
        if records_per_alignment:
            handle.flush()
            handle.seek(0)
            try:
                alignments2 = list(
                    AlignIO.parse(handle=handle,
                                  format=format,
                                  seq_count=records_per_alignment))
            except ValueError as e:
                # This is BAD.  We can't read our own output.
                # I want to see the output when called from the test harness,
                # run_tests.py (which can be funny about new lines on Windows)
                handle.seek(0)
                raise ValueError(
                    "%s\n\n%s\n\n%s" %
                    (str(e), repr(handle.read()), repr(alignments2)))
            simple_alignment_comparison(alignments, alignments2, format)

        if format in test_write_read_alignment_formats:
            # Don't need the seq_count
            handle.flush()
            handle.seek(0)
            try:
                alignments2 = list(AlignIO.parse(handle=handle, format=format))
            except ValueError as e:
                # This is BAD.  We can't read our own output.
                # I want to see the output when called from the test harness,
                # run_tests.py (which can be funny about new lines on Windows)
                handle.seek(0)
                raise ValueError(
                    "%s\n\n%s\n\n%s" %
                    (str(e), repr(handle.read()), repr(alignments2)))
            simple_alignment_comparison(alignments, alignments2, format)

        if len(alignments) > 1:
            # Try writing just one Alignment (not a list)
            handle = StringIO()
            AlignIO.write(alignments[0:1], handle, format)
            assert handle.getvalue() == alignments[0].format(format)
Beispiel #45
0
def CifAtomIterator(handle):
    """Return SeqRecord objects for each chain in a PDB file.

    The sequences are derived from the 3D structure (ATOM records), not the
    SEQRES lines in the PDB file header.

    Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries
    are converted to "X" in the sequence.

    In addition to information from the PDB header (which is the same for all
    records), the following chain specific information is placed in the
    annotation:

    record.annotations["residues"] = List of residue ID strings
    record.annotations["chain"] = Chain ID (typically A, B ,...)
    record.annotations["model"] = Model ID (typically zero)

    Where amino acids are missing from the structure, as indicated by residue
    numbering, the sequence is filled in with 'X' characters to match the size
    of the missing region, and  None is included as the corresponding entry in
    the list record.annotations["residues"].

    This function uses the Bio.PDB module to do most of the hard work. The
    annotation information could be improved but this extra parsing should be
    done in parse_pdb_header, not this module.

    This gets called internally via Bio.SeqIO for the atom based interpretation
    of the PDB file format:

    >>> from Bio import SeqIO
    >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-atom"):
    ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    Equivalently,

    >>> with open("PDB/1A8O.cif") as handle:
    ...     for record in CifAtomIterator(handle):
    ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    """
    # TODO - Add record.annotations to the doctest, esp the residues (not working?)

    # Only import parser when needed, to avoid/delay NumPy dependency in SeqIO
    from Bio.PDB.MMCIFParser import MMCIFParser
    from Bio.PDB.MMCIF2Dict import MMCIF2Dict

    # The PdbAtomIterator uses UndoHandle to peek at the first line and get the
    # PDB ID. The equivalent for mmCIF is the _entry.id field. AFAIK, the mmCIF
    # format does not constrain the order of fields, so we need to parse the
    # entire file using MMCIF2Dict. We copy the contents of the handle into a
    # StringIO buffer first, so that both MMCIF2Dict and MMCIFParser can
    # consume the handle.
    buffer = StringIO()
    shutil.copyfileobj(handle, buffer)

    # check if file is empty
    if len(buffer.getvalue()) == 0:
        raise ValueError("Empty file.")

    buffer.seek(0)
    mmcif_dict = MMCIF2Dict(buffer)
    if "_entry.id" in mmcif_dict:
        pdb_id = mmcif_dict["_entry.id"]
        if isinstance(pdb_id, list):
            pdb_id = pdb_id[0]
    else:
        warnings.warn("Could not find the '_entry.id' field; can't determine "
                      "PDB ID.", BiopythonParserWarning)
        pdb_id = '????'

    buffer.seek(0)
    struct = MMCIFParser().get_structure(pdb_id, buffer)
    for record in AtomIterator(pdb_id, struct):
        yield record
Beispiel #46
0
def CifAtomIterator(handle):
    """Return SeqRecord objects for each chain in a PDB file.

    The sequences are derived from the 3D structure (ATOM records), not the
    SEQRES lines in the PDB file header.

    Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries
    are converted to "X" in the sequence.

    In addition to information from the PDB header (which is the same for all
    records), the following chain specific information is placed in the
    annotation:

    record.annotations["residues"] = List of residue ID strings
    record.annotations["chain"] = Chain ID (typically A, B ,...)
    record.annotations["model"] = Model ID (typically zero)

    Where amino acids are missing from the structure, as indicated by residue
    numbering, the sequence is filled in with 'X' characters to match the size
    of the missing region, and  None is included as the corresponding entry in
    the list record.annotations["residues"].

    This function uses the Bio.PDB module to do most of the hard work. The
    annotation information could be improved but this extra parsing should be
    done in parse_pdb_header, not this module.

    This gets called internally via Bio.SeqIO for the atom based interpretation
    of the PDB file format:

    >>> from Bio import SeqIO
    >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-atom"):
    ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    Equivalently,

    >>> with open("PDB/1A8O.cif") as handle:
    ...     for record in CifAtomIterator(handle):
    ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    """
    # TODO - Add record.annotations to the doctest, esp the residues (not working?)

    # Only import parser when needed, to avoid/delay NumPy dependency in SeqIO
    from Bio.PDB.MMCIFParser import MMCIFParser
    from Bio.PDB.MMCIF2Dict import MMCIF2Dict

    # The PdbAtomIterator uses UndoHandle to peek at the first line and get the
    # PDB ID. The equivalent for mmCIF is the _entry.id field. AFAIK, the mmCIF
    # format does not constrain the order of fields, so we need to parse the
    # entire file using MMCIF2Dict. We copy the contents of the handle into a
    # StringIO buffer first, so that both MMCIF2Dict and MMCIFParser can
    # consume the handle.
    buffer = StringIO()
    shutil.copyfileobj(handle, buffer)

    # check if file is empty
    if len(buffer.getvalue()) == 0:
        raise ValueError("Empty file.")

    buffer.seek(0)
    mmcif_dict = MMCIF2Dict(buffer)
    if "_entry.id" in mmcif_dict:
        pdb_id = mmcif_dict["_entry.id"]
        if isinstance(pdb_id, list):
            pdb_id = pdb_id[0]
    else:
        warnings.warn("Could not find the '_entry.id' field; can't determine "
                      "PDB ID.", BiopythonParserWarning)
        pdb_id = "????"

    buffer.seek(0)
    struct = MMCIFParser().get_structure(pdb_id, buffer)
    for record in AtomIterator(pdb_id, struct):
        yield record