def _open(cgi, params={}, post=False): """Helper function to build the URL and open a handle to it (PRIVATE). Open a handle to Entrez. cgi is the URL for the cgi script to access. params is a dictionary with the options to pass to it. Does some simple error checking, and will raise an IOError if it encounters one. This function also enforces the "up to three queries per second rule" to avoid abusing the NCBI servers. """ # NCBI requirement: At most three queries per second. # Equivalently, at least a third of second between queries delay = 0.333333334 current = time.time() wait = _open.previous + delay - current if wait > 0: time.sleep(wait) _open.previous = current + wait else: _open.previous = current # Remove None values from the parameters for key, value in list(params.items()): if value is None: del params[key] # Tell Entrez that we are using Biopython (or whatever the user has # specified explicitly in the parameters or by changing the default) if "tool" not in params: params["tool"] = tool # Tell Entrez who we are if "email" not in params: if email is not None: params["email"] = email else: warnings.warn(""" Email address is not specified. To make use of NCBI's E-utilities, NCBI requires you to specify your email address with each request. As an example, if your email address is [email protected], you can specify it as follows: from Bio_Eutils import Entrez Entrez.email = '*****@*****.**' In case of excessive usage of the E-utilities, NCBI will attempt to contact a user at the email address provided before blocking access to the E-utilities.""", UserWarning) # Open a handle to Entrez. options = _urlencode(params, doseq=True) # print cgi + "?" + options try: if post: # HTTP POST handle = _urlopen(cgi, data=_as_bytes(options)) else: # HTTP GET cgi += "?" + options handle = _urlopen(cgi) except _HTTPError as exception: raise exception return _binary_to_string_handle(handle)
def externalEntityRefHandler(self, context, base, systemId, publicId): """The purpose of this function is to load the DTD locally, instead of downloading it from the URL specified in the XML. Using the local DTD results in much faster parsing. If the DTD is not found locally, we try to download it. If new DTDs become available from NCBI, putting them in Bio/Entrez/DTDs will allow the parser to see them.""" urlinfo = _urlparse(systemId) # Following attribute requires Python 2.5+ # if urlinfo.scheme=='http': if urlinfo[0] == 'http': # Then this is an absolute path to the DTD. url = systemId elif urlinfo[0] == '': # Then this is a relative path to the DTD. # Look at the parent URL to find the full path. try: url = self.dtd_urls[-1] except IndexError: # Assume the default URL for DTDs if the top parent # does not contain an absolute path source = "http://www.ncbi.nlm.nih.gov/dtd/" else: source = os.path.dirname(url) # urls always have a forward slash, don't use os.path.join url = source.rstrip("/") + "/" + systemId self.dtd_urls.append(url) # First, try to load the local version of the DTD file location, filename = os.path.split(systemId) handle = self.open_dtd_file(filename) if not handle: # DTD is not available as a local file. Try accessing it through # the internet instead. try: handle = _urlopen(url) except IOError: raise RuntimeError("Failed to access %s at %s" % (filename, url)) text = handle.read() handle.close() self.save_dtd_file(filename, text) handle = BytesIO(text) parser = self.parser.ExternalEntityParserCreate(context) parser.ElementDeclHandler = self.elementDecl parser.ParseFile(handle) handle.close() self.dtd_urls.pop() return 1
def externalEntityRefHandler(self, context, base, systemId, publicId): """The purpose of this function is to load the DTD locally, instead of downloading it from the URL specified in the XML. Using the local DTD results in much faster parsing. If the DTD is not found locally, we try to download it. If new DTDs become available from NCBI, putting them in Bio/Entrez/DTDs will allow the parser to see them.""" urlinfo = _urlparse(systemId) #Following attribute requires Python 2.5+ #if urlinfo.scheme=='http': if urlinfo[0]=='http': # Then this is an absolute path to the DTD. url = systemId elif urlinfo[0]=='': # Then this is a relative path to the DTD. # Look at the parent URL to find the full path. try: url = self.dtd_urls[-1] except IndexError: # Assume the default URL for DTDs if the top parent # does not contain an absolute path source = "http://www.ncbi.nlm.nih.gov/dtd/" else: source = os.path.dirname(url) # urls always have a forward slash, don't use os.path.join url = source.rstrip("/") + "/" + systemId self.dtd_urls.append(url) # First, try to load the local version of the DTD file location, filename = os.path.split(systemId) handle = self.open_dtd_file(filename) if not handle: # DTD is not available as a local file. Try accessing it through # the internet instead. message = """\ Unable to load DTD file %s. Bio.Entrez uses NCBI's DTD files to parse XML files returned by NCBI Entrez. Though most of NCBI's DTD files are included in the Biopython distribution, sometimes you may find that a particular DTD file is missing. While we can access the DTD file through the internet, the parser is much faster if the required DTD files are available locally. For this purpose, please download %s from %s and save it either in directory %s or in directory %s in order for Bio.Entrez to find it. Alternatively, you can save %s in the directory Bio/Entrez/DTDs in the Biopython distribution, and reinstall Biopython. Please also inform the Biopython developers about this missing DTD, by reporting a bug on https://github.com/biopython/biopython/issues or sign up to our mailing list and emailing us, so that we can include it with the next release of Biopython. Proceeding to access the DTD file through the internet... """ % (filename, filename, url, self.global_dtd_dir, self.local_dtd_dir, filename) warnings.warn(message) try: handle = _urlopen(url) except IOError: raise RuntimeException("Failed to access %s at %s" % (filename, url)) parser = self.parser.ExternalEntityParserCreate(context) parser.ElementDeclHandler = self.elementDecl parser.ParseFile(handle) handle.close() self.dtd_urls.pop() return 1