def test_genbank_to_fasta(self): """Conversion of GenBank to FASTA.""" filename = "GenBank/NC_005816.gb" old = SeqIO.read(filename, "gb") with open(filename) as handle: new = SeqIO.read(TogoWS.convert(handle, "genbank", "fasta"), "fasta") self.assertEqual(str(old.seq), str(new.seq))
def test_databases(self): """Check supported databases""" dbs = set(TogoWS._get_entry_dbs()) expected = set( [ "nuccore", "nucest", "nucgss", "nucleotide", "protein", "gene", "homologene", "snp", "mesh", "pubmed", # 'embl', "uniprot", "uniparc", "uniref100", "uniref90", "uniref50", "ddbj", "dad", "pdb", "compound", "drug", "enzyme", "genes", "glycan", "orthology", "reaction", "module", "pathway", ] ) self.assertTrue(dbs.issuperset(expected), "Missing DB: %s" % ", ".join(sorted(expected.difference(dbs))))
def test_ncbi_protein(self): """Check supported fields for NCBI protein database""" fields = set(TogoWS._get_entry_fields("ncbi-protein")) self.assertTrue( fields.issuperset( [ "entry_id", "length", "strand", "moltype", "linearity", "division", "date", "definition", "accession", "accessions", "version", "versions", "acc_version", "gi", "keywords", "organism", "common_name", "taxonomy", "comment", "seq", ] ), fields, )
def test_pubmed_16381885_title(self): """Bio.TogoWS.entry("pubmed", "16381885", field="title")""" handle = TogoWS.entry("pubmed", "16381885", field="title") data = handle.read().strip() handle.close() self.assertEqual(data, 'From genomics to chemical genomics: new developments in KEGG.')
def test_genbank_gff3(self): """Bio.TogoWS.entry("nucleotide", "X52960", format="gff")""" # Note - Using manual URL with genbank instead of nucleotide works handle = TogoWS.entry("nucleotide", "X52960", format="gff") data = handle.read() handle.close() self.assertTrue(data.startswith("##gff-version 3\nX52960\tGenbank\t"), data)
def test_pubmed_16381885_and_19850725(self): """Bio.TogoWS.entry("pubmed", "16381885,19850725")""" handle = TogoWS.entry("pubmed", "16381885,19850725") records = list(Medline.parse(handle)) handle.close() self.assertEqual(len(records), 2) self.assertEqual(records[0]["TI"], "From genomics to chemical genomics: new developments in KEGG.") self.assertEqual( records[0]["AU"], [ "Kanehisa M", "Goto S", "Hattori M", "Aoki-Kinoshita KF", "Itoh M", "Kawashima S", "Katayama T", "Araki M", "Hirakawa M", ], ) self.assertEqual( records[1]["TI"], "DDBJ launches a new archive database with analytical tools " + "for next-generation sequence data.", ) self.assertEqual( records[1]["AU"], ["Kaminuma E", "Mashima J", "Kodama Y", "Gojobori T", "Ogasawara O", "Okubo K", "Takagi T", "Nakamura Y"], )
def test_ddbj_genbank(self): """Bio.TogoWS.entry("ddbj", "X52960")""" handle = TogoWS.entry("ddbj", "X52960") # Returns "genbank" format record = SeqIO.read(handle, "gb") handle.close() self.assertEqual(record.id, "X52960.1") self.assertEqual(record.name, "X52960") self.assertEqual(len(record), 248) self.assertEqual(seguid(record.seq), "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU")
def test_ddbj_fasta(self): """Bio.TogoWS.entry("ddbj", "X52960", "fasta")""" handle = TogoWS.entry("ddbj", "X52960", "fasta") record = SeqIO.read(handle, "fasta") handle.close() self.assertTrue("X52960" in record.id, record.id) self.assertTrue("X52960" in record.name, record.name) self.assertEqual(len(record), 248) self.assertEqual(seguid(record.seq), "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU")
def test_nucleotide_fasta(self): """Bio.TogoWS.entry("nucleotide", "6273291", "fasta")""" handle = TogoWS.entry("nucleotide", "6273291", "fasta") record = SeqIO.read(handle, "fasta") handle.close() self.assertTrue("6273291" in record.id, record.id) self.assertTrue("6273291" in record.name, record.name) self.assertEqual(len(record), 902) self.assertEqual(seguid(record.seq), "bLhlq4mEFJOoS9PieOx4nhGnjAQ")
def check(self, database, search_term, expected_matches=(), limit=None): if expected_matches and limit: raise ValueError("Bad test - TogoWS makes no promises about order") search_count = TogoWS.search_count(database, search_term) if expected_matches and search_count < len(expected_matches): raise ValueError("Only %i matches, expected at least %i" % (search_count, len(expected_matches))) if search_count > 5000 and not limit: print("%i results, skipping" % search_count) return if limit: count = min(search_count, limit) else: count = search_count # Iteration should find everything... unless a limit is used search_iter = list(TogoWS.search_iter(database, search_term, limit)) self.assertEqual(count, len(search_iter)) for match in expected_matches: self.assertTrue(match in search_iter, "Expected %s in results but not" % match)
def test_embl_AM905444(self): """Bio.TogoWS.entry("embl", "AM905444")""" handle = TogoWS.entry("embl", "AM905444") record = SeqIO.read(handle, "embl") handle.close() self.assertTrue("AM905444" in record.id, record.id) self.assertTrue("AM905444" in record.name, record.name) self.assertTrue("porin" in record.description, record.description) self.assertEqual(len(record), 1164) self.assertEqual(seguid(record.seq), "G0HtLpwF7i4FXUaUjDUPTjok79c")
def test_pubmed_16381885_au(self): """Bio.TogoWS.entry("pubmed", "16381885", field="au")""" # Gives one name per line (i.e. \n separated), no dots handle = TogoWS.entry("pubmed", "16381885", field="au") data = handle.read().strip().split("\n") handle.close() self.assertEqual(data, ['Kanehisa M', 'Goto S', 'Hattori M', 'Aoki-Kinoshita KF', 'Itoh M', 'Kawashima S', 'Katayama T', 'Araki M', 'Hirakawa M'])
def test_pubmed_16381885_authors(self): """Bio.TogoWS.entry("pubmed", "16381885", field="authors")""" # Gives names tab separated (i.e. \t separated) handle = TogoWS.entry("pubmed", "16381885", field="authors") data = handle.read().strip().split("\t") handle.close() self.assertEqual(data, ['Kanehisa, M.', 'Goto, S.', 'Hattori, M.', 'Aoki-Kinoshita, K. F.', 'Itoh, M.', 'Kawashima, S.', 'Katayama, T.', 'Araki, M.', 'Hirakawa, M.'])
def test_protein_fasta(self): """Bio.TogoWS.entry("protein", "16130152", "fasta")""" handle = TogoWS.entry("protein", "16130152", "fasta") record = SeqIO.read(handle, "fasta") handle.close() # Could use assertIn but requires Python 2.7+ self.assertTrue("16130152" in record.id, record.id) self.assertTrue("16130152" in record.name, record.name) self.assertTrue("porin protein" in record.description, record.description) self.assertEqual(len(record), 367) self.assertEqual(seguid(record.seq), "fCjcjMFeGIrilHAn6h+yju267lg")
def test_ddbj(self): """Check supported fields for ddbj database""" fields = set(TogoWS._get_entry_fields("ddbj")) self.assertTrue(fields.issuperset(['entry_id', 'length', 'strand', 'moltype', 'linearity', 'division', 'date', 'definition', 'accession', 'accessions', 'version', 'versions', 'acc_version', 'gi', 'keywords', 'organism', 'common_name', 'taxonomy', 'comment', 'seq']), fields)
def test_nucleotide_fasta(self): """Bio.TogoWS.entry("nucleotide", "6273291", "fasta")""" handle = TogoWS.entry("nucleotide", "6273291", "fasta") record = SeqIO.read(handle, "fasta") handle.close() # NCBI is phasing out GI numbers, so no longer true: # self.assertIn("6273291", record.id) # self.assertIn("6273291", record.name) self.assertIn("AF191665.1", record.id) self.assertIn("AF191665.1", record.name) self.assertEqual(len(record), 902) self.assertEqual(seguid(record.seq), "bLhlq4mEFJOoS9PieOx4nhGnjAQ")
def test_pubmed_16381885(self): """Bio.TogoWS.entry("pubmed", "16381885")""" # Gives Medline plain text handle = TogoWS.entry("pubmed", "16381885") data = Medline.read(handle) handle.close() self.assertEqual(data["TI"], 'From genomics to chemical genomics: new developments in KEGG.') self.assertEqual(data["AU"], ['Kanehisa M', 'Goto S', 'Hattori M', 'Aoki-Kinoshita KF', 'Itoh M', 'Kawashima S', 'Katayama T', 'Araki M', 'Hirakawa M'])
def test_databases(self): """Check supported databases""" dbs = set(TogoWS._get_entry_dbs()) self.assert_(dbs.issuperset(['nuccore', 'nucest', 'nucgss', 'nucleotide', 'protein', 'gene', 'omim', 'homologene', 'snp', 'mesh', 'pubmed', 'embl', 'uniprot', 'uniparc', 'uniref100', 'uniref90', 'uniref50', 'ddbj', 'dad', 'pdb', 'compound', 'drug', 'enzyme', 'genes', 'glycan', 'orthology', 'reaction', 'module', 'pathway']), dbs)
def test_protein_fasta(self): """Bio.TogoWS.entry("protein", "16130152", "fasta")""" handle = TogoWS.entry("protein", "16130152", "fasta") record = SeqIO.read(handle, "fasta") handle.close() # NCBI is phasing out GI numbers, so no longer true: # self.assertIn("16130152", record.id) # self.assertIn("16130152", record.name) self.assertIn("NP_416719.1", record.id) self.assertIn("NP_416719.1", record.name) self.assertIn("porin protein", record.description) self.assertEqual(len(record), 367) self.assertEqual(seguid(record.seq), "fCjcjMFeGIrilHAn6h+yju267lg")
def test_databases(self): """Check supported databases""" dbs = set(TogoWS._get_entry_dbs()) expected = set(['nuccore', 'nucest', 'nucgss', 'nucleotide', 'protein', 'gene', 'homologene', 'snp', 'mesh', 'pubmed', # 'embl', 'uniprot', 'uniparc', 'uniref100', 'uniref90', 'uniref50', 'ddbj', 'dad', 'pdb', 'compound', 'drug', 'enzyme', 'genes', 'glycan', 'orthology', 'reaction', 'module', 'pathway']) self.assertTrue(dbs.issuperset(expected), "Missing DB: %s" % ", ".join(sorted(expected.difference(dbs))))
def test_uniprot_swiss(self): """Bio.TogoWS.entry("uniprot", ["A1AG1_HUMAN","A1AG1_MOUSE"])""" # Returns "swiss" format: handle = TogoWS.entry("uniprot", ["A1AG1_HUMAN", "A1AG1_MOUSE"]) record1, record2 = SeqIO.parse(handle, "swiss") handle.close() self.assertEqual(record1.id, "P02763") self.assertEqual(record1.name, "A1AG1_HUMAN") self.assertEqual(len(record1), 201) self.assertEqual(seguid(record1.seq), "LHDJJ6oC7gUXo8CC7Xn6EUeA8Gk") self.assertEqual(record2.id, "Q60590") self.assertEqual(record2.name, "A1AG1_MOUSE") self.assertEqual(len(record2), 207) self.assertEqual(seguid(record2.seq), "FGcj+RFQhP2gRusCmwPFty5PJT0")
def test_pubmed_16381885_and_19850725_authors(self): """Bio.TogoWS.entry("pubmed", "16381885,19850725", field="authors")""" handle = TogoWS.entry("pubmed", "16381885,19850725", field="authors") # Little hack to remove blank lines... # names = handle.read().replace("\n\n", "\n").strip().split("\n") names = handle.read().strip().split("\n") handle.close() self.assertEqual(2, len(names)) names1, names2 = names self.assertEqual(names1.split("\t"), ['Kanehisa, M.', 'Goto, S.', 'Hattori, M.', 'Aoki-Kinoshita, K. F.', 'Itoh, M.', 'Kawashima, S.', 'Katayama, T.', 'Araki, M.', 'Hirakawa, M.']) self.assertEqual(names2.split("\t"), ['Kaminuma, E.', 'Mashima, J.', 'Kodama, Y.', 'Gojobori, T.', 'Ogasawara, O.', 'Okubo, K.', 'Takagi, T.', 'Nakamura, Y.'])
def test_pubmed_16381885_and_19850725(self): """Bio.TogoWS.entry("pubmed", "16381885,19850725")""" handle = TogoWS.entry("pubmed", "16381885,19850725") records = list(Medline.parse(handle)) handle.close() self.assertEqual(len(records), 2) self.assertEqual(records[0]["TI"], 'From genomics to chemical genomics: new developments in KEGG.') self.assertEqual(records[0]["AU"], ['Kanehisa M', 'Goto S', 'Hattori M', 'Aoki-Kinoshita KF', 'Itoh M', 'Kawashima S', 'Katayama T', 'Araki M', 'Hirakawa M']) self.assertEqual(records[1]["TI"], 'DDBJ launches a new archive database with analytical tools ' + 'for next-generation sequence data.') self.assertEqual(records[1]["AU"], ['Kaminuma E', 'Mashima J', 'Kodama Y', 'Gojobori T', 'Ogasawara O', 'Okubo K', 'Takagi T', 'Nakamura Y'])
def test_pubmed_16381885(self): """Bio.TogoWS.entry("pubmed", "16381885")""" # Gives Medline plain text handle = TogoWS.entry("pubmed", "16381885") data = Medline.read(handle) handle.close() self.assertEqual(data["TI"], "From genomics to chemical genomics: new developments in KEGG.") self.assertEqual( data["AU"], [ "Kanehisa M", "Goto S", "Hattori M", "Aoki-Kinoshita KF", "Itoh M", "Kawashima S", "Katayama T", "Araki M", "Hirakawa M", ], )
def test_databases(self): """Check supported databases""" dbs = set(TogoWS._get_entry_dbs()) self.assertTrue( dbs.issuperset( [ "nuccore", "nucest", "nucgss", "nucleotide", "protein", "gene", "omim", "homologene", "snp", "mesh", "pubmed", "embl", "uniprot", "uniparc", "uniref100", "uniref90", "uniref50", "ddbj", "dad", "pdb", "compound", "drug", "enzyme", "genes", "glycan", "orthology", "reaction", "module", "pathway", ] ), dbs, )
def test_genbank_to_embl(self): """Conversion of GenBank to EMBL.""" filename = "GenBank/NC_005816.gb" old = SeqIO.read(filename, "gb") new = SeqIO.read(TogoWS.convert(open(filename), "genbank", "embl"), "embl") self.assertEqual(str(old.seq), str(new.seq))
def test_pdb(self): """Check supported fields for pdb database.""" fields = set(TogoWS._get_entry_fields("pdb")) self.assertTrue( fields.issuperset(["accession", "chains", "keywords", "models"]), fields)
def test_uniprot(self): """Check supported fields for uniprot database.""" fields = set(TogoWS._get_entry_fields("uniprot")) self.assertTrue(fields.issuperset(["definition", "entry_id", "seq"]), fields)
def test_nucleotide_genbank_length(self): """Bio.TogoWS.entry("nucleotide", "X52960", field="length")""" handle = TogoWS.entry("nucleotide", "X52960", field="length") data = handle.read().strip() # ignore trailing \n handle.close() self.assertEqual(data, "248")
def test_embl_AM905444_seq(self): """Bio.TogoWS.entry("embl", "AM905444", field="seq")""" handle = TogoWS.entry("embl", "AM905444", field="seq") data = handle.read().strip() # ignore any trailing \n handle.close() self.assertEqual(seguid(data), "G0HtLpwF7i4FXUaUjDUPTjok79c")
def test_embl_AM905444_definition(self): """Bio.TogoWS.entry("embl", "AM905444", field="definition")""" handle = TogoWS.entry("embl", "AM905444", field="definition") data = handle.read().strip() #ignore any trailing \n handle.close() self.assertEqual(data, "Herbaspirillum seropedicae locus tag HS193.0074 for porin")
def test_nucleotide_genbank_seq(self): """Bio.TogoWS.entry("nucleotide", "X52960", field="seq")""" handle = TogoWS.entry("nucleotide", "X52960", field="seq") data = handle.read().strip() # ignore trailing \n handle.close() self.assertEqual(seguid(data), "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU")
def test_embl(self): """Check supported fields for embl database""" fields = set(TogoWS._get_entry_fields("embl")) self.assertTrue(fields.issuperset(["definition", "entry_id", "seq"]), fields)
def test_nucleotide_genbank_definition(self): """Bio.TogoWS.entry("nucleotide", "X52960", field="definition")""" handle = TogoWS.entry("nucleotide", "X52960", field="definition") data = handle.read().strip() # ignore trailing \n handle.close() self.assertEqual(data, "Coleus blumei viroid 1 (CbVd) RNA.")
def test_nucleotide_genbank_acc_version(self): """Bio.TogoWS.entry("nucleotide", "X52960", field="acc_version")""" handle = TogoWS.entry("nucleotide", "X52960", field="acc_version") data = handle.read().strip() # ignore trailing \n handle.close() self.assertEqual(data, "X52960.1")
def test_nucleotide_genbank_organism(self): """Bio.TogoWS.entry("nucleotide", "X52960", field="organism")""" handle = TogoWS.entry("nucleotide", "X52960", field="organism") data = handle.read().strip() # ignore trailing \n handle.close() self.assertEqual(data, "Coleus blumei viroid 1")
def test_uniprot(self): """Check supported fields for uniprot database""" fields = set(TogoWS._get_entry_fields("uniprot")) self.assertTrue(fields.issuperset(["definition", "entry_id", "seq"]), fields)
def test_pubmed(self): """Check supported fields for pubmed database""" fields = set(TogoWS._get_entry_fields("pubmed")) self.assert_(fields.issuperset(['abstract', 'au', 'authors', 'doi', 'mesh', 'so', 'ti', 'title']), fields)
def test_pubmed(self): """Check supported fields for pubmed database""" fields = set(TogoWS._get_entry_fields("pubmed")) self.assertTrue(fields.issuperset(['abstract', 'au', 'authors', 'doi', 'mesh', 'so', 'title']), fields)
def test_ddbj_gff3(self): """Bio.TogoWS.entry("ddbj", "X52960", format="gff")""" handle = TogoWS.entry("ddbj", "X52960", format="gff") data = handle.read() handle.close() self.assertTrue(data.startswith("##gff-version 3\nX52960\tDDBJ\t"), data)
def test_embl_AM905444_gff3(self): """Bio.TogoWS.entry("embl", "AM905444", format="gff")""" handle = TogoWS.entry("embl", "AM905444", format="gff") data = handle.read() handle.close() self.assert_(data.startswith("##gff-version 3\nAM905444\tembl\t"), data)
def test_pdb(self): """Check supported fields for pdb database""" fields = set(TogoWS._get_entry_fields("pdb")) self.assertTrue(fields.issuperset(["accession", "chains", "keywords", "models"]), fields)