def find_related(pmid): """find_related(pmid) -> ids Search PubMed for a list of citations related to pmid. pmid can be a PubMed ID, a MEDLINE UID, or a list of those. """ class ResultParser(sgmllib.SGMLParser): # Parse the ID's out of the HTML-formatted page that PubMed # returns. The format of the page is: # [...] # <Link> # <Id>######</Id> # <Score>######</Score> # [...] # </Link> # [...] def __init__(self): sgmllib.SGMLParser.__init__(self) self.ids = [] self.in_link = 0 self.in_id = 0 def start_id(self, attributes): self.in_id = 1 def end_id(self): self.in_id = 0 def start_link(self, attributes): self.in_link = 1 def end_link(self): self.in_link = 0 _not_pmid_re = re.compile(r'\D') def handle_data(self, data): if not self.in_link or not self.in_id: return # Everything here should be a PMID. Check and make sure # data really is one. A PMID should be a string consisting # of only integers. Should I check to make sure it # meets a certain minimum length? if self._not_pmid_re.search(data): raise ValueError, \ "I expected an ID, but '%s' doesn't look like one." % \ repr(data) self.ids.append(data) parser = ResultParser() if type(pmid) is type([]): pmid = string.join(pmid, ',') h = NCBI.elink(dbfrom='pubmed', id=pmid) parser.feed(h.read()) return parser.ids