def externalEntityRefHandler(self, context, base, systemId, publicId): """Handle external entity reference in order to cache DTD locally. The purpose of this function is to load the DTD locally, instead of downloading it from the URL specified in the XML. Using the local DTD results in much faster parsing. If the DTD is not found locally, we try to download it. If new DTDs become available from NCBI, putting them in Bio/Entrez/DTDs will allow the parser to see them. """ urlinfo = _urlparse(systemId) # Following attribute requires Python 2.5+ # if urlinfo.scheme=='http': if urlinfo[0] in ["http", "https", "ftp"]: # Then this is an absolute path to the DTD. url = systemId elif urlinfo[0] == "": # Then this is a relative path to the DTD. # Look at the parent URL to find the full path. try: source = self.dtd_urls[-1] except IndexError: # Assume the default URL for DTDs if the top parent # does not contain an absolute path source = "http://www.ncbi.nlm.nih.gov/dtd/" else: source = os.path.dirname(source) # urls always have a forward slash, don't use os.path.join url = source.rstrip("/") + "/" + systemId else: raise ValueError("Unexpected URL scheme %r" % (urlinfo[0])) self.dtd_urls.append(url) # First, try to load the local version of the DTD file location, filename = os.path.split(systemId) handle = self.open_dtd_file(filename) if not handle: # DTD is not available as a local file. Try accessing it through # the internet instead. try: handle = _urlopen(url) except IOError: _raise_from( RuntimeError("Failed to access %s at %s" % (filename, url)), None ) text = handle.read() handle.close() self.save_dtd_file(filename, text) handle = BytesIO(text) parser = self.parser.ExternalEntityParserCreate(context) parser.ElementDeclHandler = self.elementDecl parser.ParseFile(handle) handle.close() self.dtd_urls.pop() return 1
def externalEntityRefHandler(self, context, base, systemId, publicId): """Handle external entiry reference in order to cache DTD locally. The purpose of this function is to load the DTD locally, instead of downloading it from the URL specified in the XML. Using the local DTD results in much faster parsing. If the DTD is not found locally, we try to download it. If new DTDs become available from NCBI, putting them in Bio/Entrez/DTDs will allow the parser to see them. """ urlinfo = _urlparse(systemId) # Following attribute requires Python 2.5+ # if urlinfo.scheme=='http': if urlinfo[0] in ['http', 'https', 'ftp']: # Then this is an absolute path to the DTD. url = systemId elif urlinfo[0] == '': # Then this is a relative path to the DTD. # Look at the parent URL to find the full path. try: source = self.dtd_urls[-1] except IndexError: # Assume the default URL for DTDs if the top parent # does not contain an absolute path source = "http://www.ncbi.nlm.nih.gov/dtd/" else: source = os.path.dirname(source) # urls always have a forward slash, don't use os.path.join url = source.rstrip("/") + "/" + systemId else: raise ValueError("Unexpected URL scheme %r" % (urlinfo[0])) self.dtd_urls.append(url) # First, try to load the local version of the DTD file location, filename = os.path.split(systemId) handle = self.open_dtd_file(filename) if not handle: # DTD is not available as a local file. Try accessing it through # the internet instead. try: handle = _urlopen(url) except IOError: raise RuntimeError("Failed to access %s at %s" % (filename, url)) text = handle.read() handle.close() self.save_dtd_file(filename, text) handle = BytesIO(text) parser = self.parser.ExternalEntityParserCreate(context) parser.ElementDeclHandler = self.elementDecl parser.ParseFile(handle) handle.close() self.dtd_urls.pop() return 1
def externalEntityRefHandler(self, context, base, systemId, publicId): """The purpose of this function is to load the DTD locally, instead of downloading it from the URL specified in the XML. Using the local DTD results in much faster parsing. If the DTD is not found locally, we try to download it. If new DTDs become available from NCBI, putting them in Bio/Entrez/DTDs will allow the parser to see them.""" urlinfo = _urlparse(systemId) # Following attribute requires Python 2.5+ # if urlinfo.scheme=='http': if urlinfo[0] == "http": # Then this is an absolute path to the DTD. url = systemId elif urlinfo[0] == "": # Then this is a relative path to the DTD. # Look at the parent URL to find the full path. try: url = self.dtd_urls[-1] except IndexError: # Assume the default URL for DTDs if the top parent # does not contain an absolute path source = "http://www.ncbi.nlm.nih.gov/dtd/" else: source = os.path.dirname(url) # urls always have a forward slash, don't use os.path.join url = source.rstrip("/") + "/" + systemId self.dtd_urls.append(url) # First, try to load the local version of the DTD file location, filename = os.path.split(systemId) handle = self.open_dtd_file(filename) if not handle: # DTD is not available as a local file. Try accessing it through # the internet instead. message = """\ Unable to load DTD file %s. Bio.Entrez uses NCBI's DTD files to parse XML files returned by NCBI Entrez. Though most of NCBI's DTD files are included in the Biopython distribution, sometimes you may find that a particular DTD file is missing. While we can access the DTD file through the internet, the parser is much faster if the required DTD files are available locally. For this purpose, please download %s from %s and save it either in directory %s or in directory %s in order for Bio.Entrez to find it. Alternatively, you can save %s in the directory Bio/Entrez/DTDs in the Biopython distribution, and reinstall Biopython. Please also inform the Biopython developers about this missing DTD, by reporting a bug on https://github.com/biopython/biopython/issues or sign up to our mailing list and emailing us, so that we can include it with the next release of Biopython. Proceeding to access the DTD file through the internet... """ % ( filename, filename, url, self.global_dtd_dir, self.local_dtd_dir, filename, ) warnings.warn(message) try: handle = _urlopen(url) except IOError: raise RuntimeException("Failed to access %s at %s" % (filename, url)) parser = self.parser.ExternalEntityParserCreate(context) parser.ElementDeclHandler = self.elementDecl parser.ParseFile(handle) handle.close() self.dtd_urls.pop() return 1
def externalEntityRefHandler(self, context, base, systemId, publicId): """The purpose of this function is to load the DTD locally, instead of downloading it from the URL specified in the XML. Using the local DTD results in much faster parsing. If the DTD is not found locally, we try to download it. If new DTDs become available from NCBI, putting them in Bio/Entrez/DTDs will allow the parser to see them.""" urlinfo = _urlparse(systemId) #Following attribute requires Python 2.5+ #if urlinfo.scheme=='http': if urlinfo[0] == 'http': # Then this is an absolute path to the DTD. url = systemId elif urlinfo[0] == '': # Then this is a relative path to the DTD. # Look at the parent URL to find the full path. try: url = self.dtd_urls[-1] except IndexError: # Assume the default URL for DTDs if the top parent # does not contain an absolute path source = "http://www.ncbi.nlm.nih.gov/dtd/" else: source = os.path.dirname(url) # urls always have a forward slash, don't use os.path.join url = source.rstrip("/") + "/" + systemId self.dtd_urls.append(url) # First, try to load the local version of the DTD file location, filename = os.path.split(systemId) handle = self.open_dtd_file(filename) if not handle: # DTD is not available as a local file. Try accessing it through # the internet instead. message = """\ Unable to load DTD file %s. Bio.Entrez uses NCBI's DTD files to parse XML files returned by NCBI Entrez. Though most of NCBI's DTD files are included in the Biopython distribution, sometimes you may find that a particular DTD file is missing. While we can access the DTD file through the internet, the parser is much faster if the required DTD files are available locally. For this purpose, please download %s from %s and save it either in directory %s or in directory %s in order for Bio.Entrez to find it. Alternatively, you can save %s in the directory Bio/Entrez/DTDs in the Biopython distribution, and reinstall Biopython. Please also inform the Biopython developers about this missing DTD, by reporting a bug on https://github.com/biopython/biopython/issues or sign up to our mailing list and emailing us, so that we can include it with the next release of Biopython. Proceeding to access the DTD file through the internet... """ % (filename, filename, url, self.global_dtd_dir, self.local_dtd_dir, filename) warnings.warn(message) try: handle = _urlopen(url) except IOError: raise RuntimeException("Failed to access %s at %s" % (filename, url)) parser = self.parser.ExternalEntityParserCreate(context) parser.ElementDeclHandler = self.elementDecl parser.ParseFile(handle) handle.close() self.dtd_urls.pop() return 1