def test3(self): self.setup() # To retrieve pubmed related articles for PMIDs 11812492 11774222 # with a publication date from 1995 to the present: # dbfrom=pubmed&id=11812492,11774222&db=pubmed&mindate=1995&datetype=pdat # NOTE: this example is wrong -- it need the current date too! records = self.client.from_dbids( EUtils.DBIds("pubmed", ["11812492", "11774222"])) # Can't actually use this since that would cause the regression # tests to fail. today = time.strftime("%Y/%m/%d") today = "2003/01/12" related = records.neighbor_links( daterange=EUtils.DateRange("1995", today, "pdat"))
def testRelatedItems(self): # Get proteins similar to 4579714 (bacteriorhodopsin) which # were published in 2002 client = DBIdsClient.DBIdsClient(eutils=picklestore.client()) results = client.from_dbids(EUtils.DBIds("protein", "4579714")) neighbors = results.neighbor_links("protein", daterange=EUtils.DateRange( "2002/01/01", "2002/12/31", "pdat")) dbids = neighbors.linksetdbs["protein_protein"].dbids print print len( dbids), "sequences similar to GI:4579714 were published in 2002" print "The record identifiers are:", ", ".join(map(str, dbids)) print client.from_dbids(dbids).efetch(retmode="text", rettype="summary").read()
def testHistory(self): entrez = HistoryClient.HistoryClient(eutils=picklestore.client()) results1 = entrez.search("Dalke", field="au", daterange=EUtils.DateRange("1995", "1998")) self.assertEquals(len(results1), 10) sizes = [] expression = results1.metadata.expression for x in expression: if isinstance(x, Datatypes.Term): n = x.count assert n, n # cannot be 0 or None sizes.append(n) self.assertEquals(len(sizes), 3) if sizes[0] < 30: raise AssertionError(sizes) self.assertEquals(sizes[1], -1) self.assertEquals(sizes[2], -1) self.assertEquals(expression.left.term, "Dalke[Author]") self.assertEquals(expression.left.field, "Author") self.assertEquals(expression.right.left.term, "1995[EDAT]") self.assertEquals(expression.right.left.field, "EDAT") self.assertEquals(expression.right.right.term, "1998[EDAT]") self.assertEquals(expression.right.right.field, "EDAT") expected_dbids = Datatypes.DBIds("pubmed", [ "9454215", "9454196", "9454186", "9390282", "9303476", "9300720", "8763495", "8744570", "8566008", "7648552" ]) self.assertEquals(results1.dbids, expected_dbids) # this is a no-no, since EDAT isn't a searchable field self.failUnlessRaises(EUtils.EUtilsSearchError, entrez.search, "poliovirus AND 1995:1998[EDAT]", db="nucleotide") results2 = entrez.search("poliovirus AND 1995:1998[PDAT]", db="pubmed") if len(results2) < 1160: raise AssertionError(len(results2)) all_ids = results2.dbids self.assertEquals(len(all_ids), len(results2)) self.assertEquals(all_ids[:20], results2[:20].dbids) self.assertEquals(all_ids[5:20], results2[5:20].dbids) self.assertEquals(all_ids[-5:], results2[-5:].dbids) self.assertEquals(all_ids[-5:-1], results2[-5:-1].dbids) self.assertEquals(all_ids[10:-14], results2[10:-14].dbids) # This is illegal because pubmed isn't a sequence database self.failUnlessRaises(TypeError, results2.efetch, seq_start=0) # Try a different database results3 = entrez.search("poliovirus AND 1995:1998[PDAT]", db="nucleotide") # Make sure I can still access fields from the first database self.assertEquals(results1.dbids, expected_dbids) # This is illegal because it mixes databases self.failUnlessRaises( EUtils.EUtilsSearchError, entrez.search, "#%s OR #%s" % (results1.query_key, results3.query_key)) # However, this should yield the same as results3 results4 = entrez.search("poliovirus", db="nucleotide") results5 = entrez.search("#%s AND 1995:1998[PDAT]" % results4.query_key, db=results4.db) self.assertEquals(len(results3), len(results5)) results3_dbids = results3.dbids self.assertEquals(results3_dbids, results5.dbids) # Get the sequence as FASTA one way s = results3[0].efetch(retmode='text', rettype='fasta').read() # And another way t = entrez.eutils.efetch_using_dbids(results3_dbids[:1], retmode='text', rettype='fasta').read() self.assertEquals(s, t)
def testClient(self): eutils = picklestore.client() infile = eutils.esearch("Dalke", field="au", daterange=EUtils.DateRange("1995", "1998"), retstart=1, retmax=5, usehistory=1, webenv=None) s = infile.read() try: counts = map(int, re.findall(r"<Count>(-?\d+)</Count>", s)) assert len(counts) == 4, counts assert counts[0] == 10 assert counts[1] >= 30 assert counts[2] == -1 assert counts[3] == -1 assert s.find("<RetMax>5</RetMax>") != -1 assert s.find("<RetStart>1</RetStart>") != -1 ids = re.findall(r"<Id>(\d+)</Id>", s) assert len(ids) == 5, ids terms = re.findall(r"<Term>([^<]+)</Term>", s) assert len(terms) == 3 assert terms[0] == "Dalke[Author]" assert terms[1] == "1995[EDAT]" assert terms[2] == "1998[EDAT]" query_key1 = re.findall(r"<QueryKey>(\d+)</QueryKey>", s)[0] assert query_key1 == "1", query_key1 # always true? quoted_webenv = re.findall("<WebEnv>([^>]+)</WebEnv>", s)[0] webenv = urllib.unquote(quoted_webenv) except: print "ERROR!" print s raise try: # Can I refetch those same Ids using the history? t = "" t = eutils.efetch_using_history(db="pubmed", webenv=webenv, query_key=query_key1, retstart=1, retmax=5, retmode="text", rettype="uilist").read() new_ids = t.split() assert ids == new_ids, (ids, new_ids) # Must be in same order too! except: print "ERROR!" print s print " -- and --" print t raise # Make sure I'm getting the same XML summary through history and id sum1 = sum2 = None try: sum1 = eutils.esummary_using_history(db="pubmed", webenv=webenv, query_key=query_key1, retstart=1, retmax=1).read() sum2 = eutils.esummary_using_dbids( dbids=EUtils.DBIds("pubmed", [ids[0]])).read() assert sum1 == sum2 except: print "Summary 1" print sum1 print "-----------------" print "Summary 2" print sum2 raise # Make sure I'm getting the same XML version of the records rec1 = rec2 = None try: rec1 = eutils.efetch_using_history(db="pubmed", webenv=webenv, query_key=query_key1, retmode="xml", retstart=1, retmax=1).read() rec2 = eutils.efetch_using_dbids(dbids=EUtils.DBIds( "pubmed", [ids[0]]), retmode="xml").read() assert rec1 == rec2 except: print "Record 1" print rec1 print "-----------------" print "Record 2" print rec2 raise # Post a few GIs (from the protein database) to the server # This appends to the existing history so should be query_key #2. post_ids = ["914034", "5263173", "1769808", "1060883"] infile = eutils.epost(EUtils.DBIds("protein", post_ids), webenv=webenv) post_results = infile.read() try: query_key2 = re.findall(r"<QueryKey>(\d+)</QueryKey>", post_results)[0] assert query_key2 == "2" quoted_webenv = re.findall("<WebEnv>([^>]+)</WebEnv>", post_results)[0] webenv = urllib.unquote(quoted_webenv) except: print "ERROR" print post_results raise # Verify that the posted ids are correct posted_ids = eutils.efetch_using_history( db="pubmed", webenv=webenv, query_key=query_key2, retstart=0, retmax=len(post_ids), retmode="text", rettype="uilist").read().split() x1 = posted_ids[:] # Make copies since I need the correct x1.sort() # order for getting the FASTA version, below x2 = post_ids[:] x2.sort() assert x1 == x2, (post_ids, posted_ids) # Now fetch them as FASTA format fasta1 = fasta2 = None try: fasta1 = eutils.efetch_using_history(db="protein", webenv=webenv, query_key=query_key2, retstart=0, retmax=len(post_ids), retmode="text", rettype="fasta").read() fasta2 = eutils.efetch_using_dbids(dbids=EUtils.DBIds( "protein", posted_ids), retmode="text", rettype="fasta").read() assert fasta1 == fasta2 except: print "ERROR FASTA1" print fasta1 print "ERROR FASTA2" print fasta2 raise # It's much harder to test the ELink capabilities. # Get the VMD paper results = None try: results = eutils.esearch( "Humphrey W. AND Dalke A. AND Schulten K. AND VMD[Title]", field="au").read() # There should only be one match ids = re.findall(r"<Id>(\d+)</Id>", results) assert ids == ["8744570"] except: print "Error" print results raise # Look at the related publications and we should find # my Tcl paper, which is 9390282 links = None try: links = eutils.elink_using_dbids(EUtils.DBIds("pubmed", ids), cmd="neighbor").read() # remember, the first id comes from the <Id> in <IdList> related_ids = re.findall(r"<Id>(\d+)</Id>", links)[1:] assert "9390282" in related_ids except: print "Error" print links raise # Get the taxonomy record for the "posted_ids". # NOTE: This test original compared the 2nd element from # that list, but elink_using_history doesn't support the # retstart/retmax parameters. # # This comes from query_key2 in the history. Do it both ways # to compare results. link1 = link2 = None try: link1 = eutils.elink_using_dbids(EUtils.DBIds( "protein", posted_ids), db="taxonomy", cmd="neighbor").read() link2 = eutils.elink_using_history(dbfrom="protein", webenv=webenv, query_key=query_key2, db="taxonomy", cmd="neighbor").read() assert link1 == link2 taxids = re.findall(r"<Id>(\d+)</Id>", link1)[len(posted_ids):] assert taxids == ["43776", "29282", "28442", "2237"], taxids except: print "Error", print link1 print "----------------" print link2 raise # See if there are linkouts 914034 # Should be at least one, to DART 3240. llinks = None try: llinks = eutils.elink_using_dbids(EUtils.DBIds( "protein", [posted_ids[1]]), cmd="llinks").read() assert llinks.find("<ObjUrl>") != -1 assert "3240" in re.findall("<Id>(\d+)</Id>", llinks) except: print "ERROR" print llinks raise # Finally, check that I can limit the seach to an Entrez query string # I'm using the example # "retrieve MEDLINE indexed only related articles for PMID 12242737" # elink.fcgi?dbfrom=pubmed&id=12242737&db=pubmed&term=medline[sb] full = restricted = None try: full = eutils.elink_using_dbids( EUtils.DBIds("pubmed", ["12242737"])).read() restricted = eutils.elink_using_dbids(EUtils.DBIds( "pubmed", ["12242737"]), term="medline[sb]").read() counts1 = full.count("<Link>") counts2 = restricted.count("<Link>") assert counts1 > counts2 except: print "ERROR" print full print "---------" print restricted raise