예제 #1
0
 def test_genbank_to_fasta(self):
     """Conversion of GenBank to FASTA."""
     filename = "GenBank/NC_005816.gb"
     old = SeqIO.read(filename, "gb")
     with open(filename) as handle:
         new = SeqIO.read(TogoWS.convert(handle, "genbank", "fasta"), "fasta")
     self.assertEqual(str(old.seq), str(new.seq))
예제 #2
0
 def test_databases(self):
     """Check supported databases"""
     dbs = set(TogoWS._get_entry_dbs())
     expected = set(
         [
             "nuccore",
             "nucest",
             "nucgss",
             "nucleotide",
             "protein",
             "gene",
             "homologene",
             "snp",
             "mesh",
             "pubmed",  # 'embl',
             "uniprot",
             "uniparc",
             "uniref100",
             "uniref90",
             "uniref50",
             "ddbj",
             "dad",
             "pdb",
             "compound",
             "drug",
             "enzyme",
             "genes",
             "glycan",
             "orthology",
             "reaction",
             "module",
             "pathway",
         ]
     )
     self.assertTrue(dbs.issuperset(expected), "Missing DB: %s" % ", ".join(sorted(expected.difference(dbs))))
예제 #3
0
 def test_ncbi_protein(self):
     """Check supported fields for NCBI protein database"""
     fields = set(TogoWS._get_entry_fields("ncbi-protein"))
     self.assertTrue(
         fields.issuperset(
             [
                 "entry_id",
                 "length",
                 "strand",
                 "moltype",
                 "linearity",
                 "division",
                 "date",
                 "definition",
                 "accession",
                 "accessions",
                 "version",
                 "versions",
                 "acc_version",
                 "gi",
                 "keywords",
                 "organism",
                 "common_name",
                 "taxonomy",
                 "comment",
                 "seq",
             ]
         ),
         fields,
     )
예제 #4
0
 def test_pubmed_16381885_title(self):
     """Bio.TogoWS.entry("pubmed", "16381885", field="title")"""
     handle = TogoWS.entry("pubmed", "16381885", field="title")
     data = handle.read().strip()
     handle.close()
     self.assertEqual(data,
          'From genomics to chemical genomics: new developments in KEGG.')
예제 #5
0
 def test_genbank_gff3(self):
     """Bio.TogoWS.entry("nucleotide", "X52960", format="gff")"""
     # Note - Using manual URL with genbank instead of nucleotide works
     handle = TogoWS.entry("nucleotide", "X52960", format="gff")
     data = handle.read()
     handle.close()
     self.assertTrue(data.startswith("##gff-version 3\nX52960\tGenbank\t"), data)
예제 #6
0
 def test_pubmed_16381885_and_19850725(self):
     """Bio.TogoWS.entry("pubmed", "16381885,19850725")"""
     handle = TogoWS.entry("pubmed", "16381885,19850725")
     records = list(Medline.parse(handle))
     handle.close()
     self.assertEqual(len(records), 2)
     self.assertEqual(records[0]["TI"], "From genomics to chemical genomics: new developments in KEGG.")
     self.assertEqual(
         records[0]["AU"],
         [
             "Kanehisa M",
             "Goto S",
             "Hattori M",
             "Aoki-Kinoshita KF",
             "Itoh M",
             "Kawashima S",
             "Katayama T",
             "Araki M",
             "Hirakawa M",
         ],
     )
     self.assertEqual(
         records[1]["TI"],
         "DDBJ launches a new archive database with analytical tools " + "for next-generation sequence data.",
     )
     self.assertEqual(
         records[1]["AU"],
         ["Kaminuma E", "Mashima J", "Kodama Y", "Gojobori T", "Ogasawara O", "Okubo K", "Takagi T", "Nakamura Y"],
     )
예제 #7
0
 def test_ddbj_genbank(self):
     """Bio.TogoWS.entry("ddbj", "X52960")"""
     handle = TogoWS.entry("ddbj", "X52960")  # Returns "genbank" format
     record = SeqIO.read(handle, "gb")
     handle.close()
     self.assertEqual(record.id, "X52960.1")
     self.assertEqual(record.name, "X52960")
     self.assertEqual(len(record), 248)
     self.assertEqual(seguid(record.seq), "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU")
예제 #8
0
 def test_ddbj_fasta(self):
     """Bio.TogoWS.entry("ddbj", "X52960", "fasta")"""
     handle = TogoWS.entry("ddbj", "X52960", "fasta")
     record = SeqIO.read(handle, "fasta")
     handle.close()
     self.assertTrue("X52960" in record.id, record.id)
     self.assertTrue("X52960" in record.name, record.name)
     self.assertEqual(len(record), 248)
     self.assertEqual(seguid(record.seq), "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU")
예제 #9
0
 def test_nucleotide_fasta(self):
     """Bio.TogoWS.entry("nucleotide", "6273291", "fasta")"""
     handle = TogoWS.entry("nucleotide", "6273291", "fasta")
     record = SeqIO.read(handle, "fasta")
     handle.close()
     self.assertTrue("6273291" in record.id, record.id)
     self.assertTrue("6273291" in record.name, record.name)
     self.assertEqual(len(record), 902)
     self.assertEqual(seguid(record.seq), "bLhlq4mEFJOoS9PieOx4nhGnjAQ")
예제 #10
0
    def check(self, database, search_term, expected_matches=(), limit=None):
        if expected_matches and limit:
            raise ValueError("Bad test - TogoWS makes no promises about order")
        search_count = TogoWS.search_count(database, search_term)
        if expected_matches and search_count < len(expected_matches):
            raise ValueError("Only %i matches, expected at least %i" % (search_count, len(expected_matches)))
        if search_count > 5000 and not limit:
            print("%i results, skipping" % search_count)
            return
        if limit:
            count = min(search_count, limit)
        else:
            count = search_count

        # Iteration should find everything... unless a limit is used
        search_iter = list(TogoWS.search_iter(database, search_term, limit))
        self.assertEqual(count, len(search_iter))
        for match in expected_matches:
            self.assertTrue(match in search_iter, "Expected %s in results but not" % match)
예제 #11
0
 def test_embl_AM905444(self):
     """Bio.TogoWS.entry("embl", "AM905444")"""
     handle = TogoWS.entry("embl", "AM905444")
     record = SeqIO.read(handle, "embl")
     handle.close()
     self.assertTrue("AM905444" in record.id, record.id)
     self.assertTrue("AM905444" in record.name, record.name)
     self.assertTrue("porin" in record.description, record.description)
     self.assertEqual(len(record), 1164)
     self.assertEqual(seguid(record.seq), "G0HtLpwF7i4FXUaUjDUPTjok79c")
예제 #12
0
 def test_pubmed_16381885_au(self):
     """Bio.TogoWS.entry("pubmed", "16381885", field="au")"""
     # Gives one name per line (i.e. \n separated), no dots
     handle = TogoWS.entry("pubmed", "16381885", field="au")
     data = handle.read().strip().split("\n")
     handle.close()
     self.assertEqual(data, ['Kanehisa M', 'Goto S', 'Hattori M',
                             'Aoki-Kinoshita KF', 'Itoh M',
                             'Kawashima S', 'Katayama T', 'Araki M',
                             'Hirakawa M'])
예제 #13
0
 def test_pubmed_16381885_authors(self):
     """Bio.TogoWS.entry("pubmed", "16381885", field="authors")"""
     # Gives names tab separated (i.e. \t separated)
     handle = TogoWS.entry("pubmed", "16381885", field="authors")
     data = handle.read().strip().split("\t")
     handle.close()
     self.assertEqual(data, ['Kanehisa, M.', 'Goto, S.', 'Hattori, M.',
                             'Aoki-Kinoshita, K. F.', 'Itoh, M.',
                             'Kawashima, S.', 'Katayama, T.', 'Araki, M.',
                             'Hirakawa, M.'])
예제 #14
0
 def test_protein_fasta(self):
     """Bio.TogoWS.entry("protein", "16130152", "fasta")"""
     handle = TogoWS.entry("protein", "16130152", "fasta")
     record = SeqIO.read(handle, "fasta")
     handle.close()
     # Could use assertIn but requires Python 2.7+
     self.assertTrue("16130152" in record.id, record.id)
     self.assertTrue("16130152" in record.name, record.name)
     self.assertTrue("porin protein" in record.description, record.description)
     self.assertEqual(len(record), 367)
     self.assertEqual(seguid(record.seq), "fCjcjMFeGIrilHAn6h+yju267lg")
예제 #15
0
 def test_ddbj(self):
     """Check supported fields for ddbj database"""
     fields = set(TogoWS._get_entry_fields("ddbj"))
     self.assertTrue(fields.issuperset(['entry_id', 'length', 'strand',
                                        'moltype', 'linearity', 'division',
                                        'date', 'definition', 'accession',
                                        'accessions', 'version', 'versions',
                                        'acc_version', 'gi', 'keywords',
                                        'organism', 'common_name',
                                        'taxonomy', 'comment', 'seq']),
                                        fields)
예제 #16
0
 def test_nucleotide_fasta(self):
     """Bio.TogoWS.entry("nucleotide", "6273291", "fasta")"""
     handle = TogoWS.entry("nucleotide", "6273291", "fasta")
     record = SeqIO.read(handle, "fasta")
     handle.close()
     # NCBI is phasing out GI numbers, so no longer true:
     # self.assertIn("6273291", record.id)
     # self.assertIn("6273291", record.name)
     self.assertIn("AF191665.1", record.id)
     self.assertIn("AF191665.1", record.name)
     self.assertEqual(len(record), 902)
     self.assertEqual(seguid(record.seq), "bLhlq4mEFJOoS9PieOx4nhGnjAQ")
예제 #17
0
 def test_pubmed_16381885(self):
     """Bio.TogoWS.entry("pubmed", "16381885")"""
     # Gives Medline plain text
     handle = TogoWS.entry("pubmed", "16381885")
     data = Medline.read(handle)
     handle.close()
     self.assertEqual(data["TI"],
          'From genomics to chemical genomics: new developments in KEGG.')
     self.assertEqual(data["AU"], ['Kanehisa M', 'Goto S', 'Hattori M',
                                   'Aoki-Kinoshita KF', 'Itoh M',
                                   'Kawashima S', 'Katayama T', 'Araki M',
                                   'Hirakawa M'])
예제 #18
0
 def test_databases(self):
     """Check supported databases"""
     dbs = set(TogoWS._get_entry_dbs())
     self.assert_(dbs.issuperset(['nuccore', 'nucest', 'nucgss',
                                  'nucleotide', 'protein', 'gene',
                                  'omim', 'homologene', 'snp',
                                  'mesh', 'pubmed', 'embl',
                                  'uniprot', 'uniparc', 'uniref100',
                                  'uniref90', 'uniref50', 'ddbj',
                                  'dad', 'pdb', 'compound', 'drug',
                                  'enzyme', 'genes', 'glycan',
                                  'orthology', 'reaction', 'module',
                                  'pathway']), dbs)
예제 #19
0
 def test_protein_fasta(self):
     """Bio.TogoWS.entry("protein", "16130152", "fasta")"""
     handle = TogoWS.entry("protein", "16130152", "fasta")
     record = SeqIO.read(handle, "fasta")
     handle.close()
     # NCBI is phasing out GI numbers, so no longer true:
     # self.assertIn("16130152", record.id)
     # self.assertIn("16130152", record.name)
     self.assertIn("NP_416719.1", record.id)
     self.assertIn("NP_416719.1", record.name)
     self.assertIn("porin protein", record.description)
     self.assertEqual(len(record), 367)
     self.assertEqual(seguid(record.seq), "fCjcjMFeGIrilHAn6h+yju267lg")
예제 #20
0
 def test_databases(self):
     """Check supported databases"""
     dbs = set(TogoWS._get_entry_dbs())
     expected = set(['nuccore', 'nucest', 'nucgss',
                     'nucleotide', 'protein', 'gene',
                     'homologene', 'snp',
                     'mesh', 'pubmed',  # 'embl',
                     'uniprot', 'uniparc', 'uniref100',
                     'uniref90', 'uniref50', 'ddbj',
                     'dad', 'pdb', 'compound', 'drug',
                     'enzyme', 'genes', 'glycan',
                     'orthology', 'reaction', 'module',
                     'pathway'])
     self.assertTrue(dbs.issuperset(expected),
                     "Missing DB: %s" % ", ".join(sorted(expected.difference(dbs))))
예제 #21
0
    def test_uniprot_swiss(self):
        """Bio.TogoWS.entry("uniprot", ["A1AG1_HUMAN","A1AG1_MOUSE"])"""
        # Returns "swiss" format:
        handle = TogoWS.entry("uniprot", ["A1AG1_HUMAN", "A1AG1_MOUSE"])
        record1, record2 = SeqIO.parse(handle, "swiss")
        handle.close()

        self.assertEqual(record1.id, "P02763")
        self.assertEqual(record1.name, "A1AG1_HUMAN")
        self.assertEqual(len(record1), 201)
        self.assertEqual(seguid(record1.seq), "LHDJJ6oC7gUXo8CC7Xn6EUeA8Gk")

        self.assertEqual(record2.id, "Q60590")
        self.assertEqual(record2.name, "A1AG1_MOUSE")
        self.assertEqual(len(record2), 207)
        self.assertEqual(seguid(record2.seq), "FGcj+RFQhP2gRusCmwPFty5PJT0")
예제 #22
0
 def test_pubmed_16381885_and_19850725_authors(self):
     """Bio.TogoWS.entry("pubmed", "16381885,19850725", field="authors")"""
     handle = TogoWS.entry("pubmed", "16381885,19850725", field="authors")
     # Little hack to remove blank lines...
     # names = handle.read().replace("\n\n", "\n").strip().split("\n")
     names = handle.read().strip().split("\n")
     handle.close()
     self.assertEqual(2, len(names))
     names1, names2 = names
     self.assertEqual(names1.split("\t"),
                      ['Kanehisa, M.', 'Goto, S.', 'Hattori, M.',
                       'Aoki-Kinoshita, K. F.', 'Itoh, M.',
                       'Kawashima, S.', 'Katayama, T.',
                       'Araki, M.', 'Hirakawa, M.'])
     self.assertEqual(names2.split("\t"),
                      ['Kaminuma, E.', 'Mashima, J.', 'Kodama, Y.',
                       'Gojobori, T.', 'Ogasawara, O.', 'Okubo, K.',
                       'Takagi, T.', 'Nakamura, Y.'])
예제 #23
0
 def test_pubmed_16381885_and_19850725(self):
     """Bio.TogoWS.entry("pubmed", "16381885,19850725")"""
     handle = TogoWS.entry("pubmed", "16381885,19850725")
     records = list(Medline.parse(handle))
     handle.close()
     self.assertEqual(len(records), 2)
     self.assertEqual(records[0]["TI"],
          'From genomics to chemical genomics: new developments in KEGG.')
     self.assertEqual(records[0]["AU"], ['Kanehisa M', 'Goto S',
                                         'Hattori M', 'Aoki-Kinoshita KF',
                                         'Itoh M', 'Kawashima S',
                                         'Katayama T', 'Araki M',
                                         'Hirakawa M'])
     self.assertEqual(records[1]["TI"],
          'DDBJ launches a new archive database with analytical tools ' +
          'for next-generation sequence data.')
     self.assertEqual(records[1]["AU"], ['Kaminuma E', 'Mashima J',
                                         'Kodama Y', 'Gojobori T',
                                         'Ogasawara O', 'Okubo K',
                                         'Takagi T', 'Nakamura Y'])
예제 #24
0
 def test_pubmed_16381885(self):
     """Bio.TogoWS.entry("pubmed", "16381885")"""
     # Gives Medline plain text
     handle = TogoWS.entry("pubmed", "16381885")
     data = Medline.read(handle)
     handle.close()
     self.assertEqual(data["TI"], "From genomics to chemical genomics: new developments in KEGG.")
     self.assertEqual(
         data["AU"],
         [
             "Kanehisa M",
             "Goto S",
             "Hattori M",
             "Aoki-Kinoshita KF",
             "Itoh M",
             "Kawashima S",
             "Katayama T",
             "Araki M",
             "Hirakawa M",
         ],
     )
예제 #25
0
 def test_databases(self):
     """Check supported databases"""
     dbs = set(TogoWS._get_entry_dbs())
     self.assertTrue(
         dbs.issuperset(
             [
                 "nuccore",
                 "nucest",
                 "nucgss",
                 "nucleotide",
                 "protein",
                 "gene",
                 "omim",
                 "homologene",
                 "snp",
                 "mesh",
                 "pubmed",
                 "embl",
                 "uniprot",
                 "uniparc",
                 "uniref100",
                 "uniref90",
                 "uniref50",
                 "ddbj",
                 "dad",
                 "pdb",
                 "compound",
                 "drug",
                 "enzyme",
                 "genes",
                 "glycan",
                 "orthology",
                 "reaction",
                 "module",
                 "pathway",
             ]
         ),
         dbs,
     )
예제 #26
0
 def test_genbank_to_embl(self):
     """Conversion of GenBank to EMBL."""
     filename = "GenBank/NC_005816.gb"
     old = SeqIO.read(filename, "gb")
     new = SeqIO.read(TogoWS.convert(open(filename), "genbank", "embl"), "embl")
     self.assertEqual(str(old.seq), str(new.seq))
 def test_pdb(self):
     """Check supported fields for pdb database."""
     fields = set(TogoWS._get_entry_fields("pdb"))
     self.assertTrue(
         fields.issuperset(["accession", "chains", "keywords", "models"]),
         fields)
 def test_uniprot(self):
     """Check supported fields for uniprot database."""
     fields = set(TogoWS._get_entry_fields("uniprot"))
     self.assertTrue(fields.issuperset(["definition", "entry_id", "seq"]),
                     fields)
예제 #29
0
 def test_nucleotide_genbank_length(self):
     """Bio.TogoWS.entry("nucleotide", "X52960", field="length")"""
     handle = TogoWS.entry("nucleotide", "X52960", field="length")
     data = handle.read().strip()  # ignore trailing \n
     handle.close()
     self.assertEqual(data, "248")
예제 #30
0
 def test_embl_AM905444_seq(self):
     """Bio.TogoWS.entry("embl", "AM905444", field="seq")"""
     handle = TogoWS.entry("embl", "AM905444", field="seq")
     data = handle.read().strip()  # ignore any trailing \n
     handle.close()
     self.assertEqual(seguid(data), "G0HtLpwF7i4FXUaUjDUPTjok79c")
예제 #31
0
 def test_embl_AM905444_definition(self):
     """Bio.TogoWS.entry("embl", "AM905444", field="definition")"""
     handle = TogoWS.entry("embl", "AM905444", field="definition")
     data = handle.read().strip() #ignore any trailing \n
     handle.close()
     self.assertEqual(data, "Herbaspirillum seropedicae locus tag HS193.0074 for porin")
예제 #32
0
 def test_nucleotide_genbank_seq(self):
     """Bio.TogoWS.entry("nucleotide", "X52960", field="seq")"""
     handle = TogoWS.entry("nucleotide", "X52960", field="seq")
     data = handle.read().strip()  # ignore trailing \n
     handle.close()
     self.assertEqual(seguid(data), "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU")
예제 #33
0
 def test_embl(self):
     """Check supported fields for embl database"""
     fields = set(TogoWS._get_entry_fields("embl"))
     self.assertTrue(fields.issuperset(["definition", "entry_id", "seq"]),
                     fields)
예제 #34
0
 def test_nucleotide_genbank_definition(self):
     """Bio.TogoWS.entry("nucleotide", "X52960", field="definition")"""
     handle = TogoWS.entry("nucleotide", "X52960", field="definition")
     data = handle.read().strip()  # ignore trailing \n
     handle.close()
     self.assertEqual(data, "Coleus blumei viroid 1 (CbVd) RNA.")
예제 #35
0
 def test_nucleotide_genbank_acc_version(self):
     """Bio.TogoWS.entry("nucleotide", "X52960", field="acc_version")"""
     handle = TogoWS.entry("nucleotide", "X52960", field="acc_version")
     data = handle.read().strip()  # ignore trailing \n
     handle.close()
     self.assertEqual(data, "X52960.1")
예제 #36
0
 def test_nucleotide_genbank_organism(self):
     """Bio.TogoWS.entry("nucleotide", "X52960", field="organism")"""
     handle = TogoWS.entry("nucleotide", "X52960", field="organism")
     data = handle.read().strip()  # ignore trailing \n
     handle.close()
     self.assertEqual(data, "Coleus blumei viroid 1")
예제 #37
0
 def test_uniprot(self):
     """Check supported fields for uniprot database"""
     fields = set(TogoWS._get_entry_fields("uniprot"))
     self.assertTrue(fields.issuperset(["definition", "entry_id", "seq"]),
                     fields)
예제 #38
0
 def test_pubmed(self):
     """Check supported fields for pubmed database"""
     fields = set(TogoWS._get_entry_fields("pubmed"))
     self.assert_(fields.issuperset(['abstract', 'au', 'authors',
                                     'doi', 'mesh', 'so', 'ti',
                                     'title']), fields)
예제 #39
0
 def test_pubmed(self):
     """Check supported fields for pubmed database"""
     fields = set(TogoWS._get_entry_fields("pubmed"))
     self.assertTrue(fields.issuperset(['abstract', 'au', 'authors',
                                        'doi', 'mesh', 'so',
                                        'title']), fields)
예제 #40
0
 def test_nucleotide_genbank_organism(self):
     """Bio.TogoWS.entry("nucleotide", "X52960", field="organism")"""
     handle = TogoWS.entry("nucleotide", "X52960", field="organism")
     data = handle.read().strip()  # ignore trailing \n
     handle.close()
     self.assertEqual(data, "Coleus blumei viroid 1")
예제 #41
0
 def test_ddbj_gff3(self):
     """Bio.TogoWS.entry("ddbj", "X52960", format="gff")"""
     handle = TogoWS.entry("ddbj", "X52960", format="gff")
     data = handle.read()
     handle.close()
     self.assertTrue(data.startswith("##gff-version 3\nX52960\tDDBJ\t"), data)
예제 #42
0
 def test_embl_AM905444_gff3(self):
     """Bio.TogoWS.entry("embl", "AM905444", format="gff")"""
     handle = TogoWS.entry("embl", "AM905444", format="gff")
     data = handle.read()
     handle.close()
     self.assert_(data.startswith("##gff-version 3\nAM905444\tembl\t"), data)
예제 #43
0
 def test_pdb(self):
     """Check supported fields for pdb database"""
     fields = set(TogoWS._get_entry_fields("pdb"))
     self.assertTrue(fields.issuperset(["accession", "chains", "keywords",
                                        "models"]), fields)
예제 #44
0
 def test_ddbj_gff3(self):
     """Bio.TogoWS.entry("ddbj", "X52960", format="gff")"""
     handle = TogoWS.entry("ddbj", "X52960", format="gff")
     data = handle.read()
     handle.close()
     self.assertTrue(data.startswith("##gff-version 3\nX52960\tDDBJ\t"), data)