def open_local_file(self, url): import mimetypes, mimetools, email.utils try: from cStringIO import StringIO except ImportError: from StringIO import StringIO host, file = splithost(url) localname = url2pathname(file) try: stats = os.stat(localname) except OSError as e: raise IOError(e.errno, e.strerror, e.filename) size = stats.st_size modified = email.utils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(url)[0] headers = mimetools.Message(StringIO('Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified))) if not host: urlfile = file if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) host, port = splitport(host) if not port and socket.gethostbyname(host) in (localhost(), thishost()): urlfile = file if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) raise IOError, ('local file error', 'not on local host')
def _parse_image(self, element): image_path = element.attrib['src'] image_path = nturl2path.url2pathname(image_path) name = '' # Find caption text found_prev = self._find_prev_caption_text(element) if found_prev['found']: name = found_prev['prev'].text else: found_next = self._find_next_caption_text(element) if found_next['found']: name = found_next['next'].text if name != '': new_path = '{}.jpg'.format( re.sub(r'(?:[^a-zä-ö0-9]|(?<=[\'"])s)', r'', name, flags=re.IGNORECASE)) file_prefix = os.path.basename( os.path.splitext(self._save_path)[0]) new_path = os.path.join('{}_images'.format(file_prefix), new_path) self._copy_rename_image_files(new_path, image_path) self._images.append({ 'name': self._convert_image_name(name), 'image': new_path })
def retrieve(self, url, filename = None, reporthook = None, data = None): url = unwrap(toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] type, url1 = splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return (url2pathname(splithost(url1)[1]), hdrs) except IOError: pass fp = self.open(url, data) try: headers = fp.info() if filename: tfp = open(filename, 'wb') else: import tempfile garbage, path = splittype(url) garbage, path = splithost(path or '') path, garbage = splitquery(path or '') path, garbage = splitattr(path or '') suffix = os.path.splitext(path)[1] fd, filename = tempfile.mkstemp(suffix) self.__tempfiles.append(filename) tfp = os.fdopen(fd, 'wb') try: result = (filename, headers) if self.tempcache is not None: self.tempcache[url] = result bs = 8192 size = -1 read = 0 blocknum = 0 if reporthook: if 'content-length' in headers: size = int(headers['Content-Length']) reporthook(blocknum, bs, size) while 1: block = fp.read(bs) if block == '': break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) finally: tfp.close() finally: fp.close() if size >= 0 and read < size: raise ContentTooShortError('retrieval incomplete: got only %i out of %i bytes' % (read, size), result) return result
def open_local_file(self, url): import mimetypes, mimetools, email.utils try: from cStringIO import StringIO except ImportError: from StringIO import StringIO host, file = splithost(url) localname = url2pathname(file) try: stats = os.stat(localname) except OSError as e: raise IOError(e.errno, e.strerror, e.filename) size = stats.st_size modified = email.utils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(url)[0] headers = mimetools.Message( StringIO( 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified))) if not host: urlfile = file if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) host, port = splitport(host) if not port and socket.gethostbyname(host) in (localhost(), thishost()): urlfile = file if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) raise IOError, ('local file error', 'not on local host')
def OnLinkClicked(self, linkinfo): href = linkinfo.GetHref().split('.html') if len(href) == 1: anchor = href[0] if anchor == '.': self.index() elif anchor[:5] == 'file:': self.open(nturl2path.url2pathname(anchor[5:])) else: self.loadDoc(self.moduleName, anchor) elif len(href) == 2: moduleName, anchor = href self.loadDoc(moduleName, anchor)
def OnLinkClicked(self, linkinfo): href = linkinfo.GetHref().split('.html') if len(href)==1: anchor = href[0] if anchor == '.': self.index() elif anchor[:5]== 'file:': self.open(nturl2path.url2pathname(anchor[5:])) else: self.loadDoc(self.moduleName,anchor) elif len(href)==2: moduleName, anchor = href self.loadDoc(moduleName,anchor)
import nturl2path file = r"c:\my\little\pony" print(nturl2path.pathname2url(file)) print(nturl2path.url2pathname(nturl2path.pathname2url(file))) ## ///C|/my/little/pony ## C:\my\little\pony
import nturl2path file = r"c:\my\little\pony" print nturl2path.pathname2url(file) print nturl2path.url2pathname(nturl2path.pathname2url(file)) ## ///C|/my/little/pony ## C:\my\little\pony
def retrieve(self, url, filename=None, reporthook=None, data=None): url = unwrap(toBytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] else: type, url1 = splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() return (url2pathname(splithost(url1)[1]), hdrs) except IOError: pass fp = self.open(url, data) try: headers = fp.info() if filename: tfp = open(filename, 'wb') else: import tempfile garbage, path = splittype(url) garbage, path = splithost(path or '') path, garbage = splitquery(path or '') path, garbage = splitattr(path or '') suffix = os.path.splitext(path)[1] fd, filename = tempfile.mkstemp(suffix) self.__tempfiles.append(filename) tfp = os.fdopen(fd, 'wb') try: result = (filename, headers) if self.tempcache is not None: self.tempcache[url] = result bs = 8192 size = -1 read = 0 blocknum = 0 if 'content-length' in headers: size = int(headers['Content-Length']) if reporthook: reporthook(blocknum, bs, size) while 1: block = fp.read(bs) if block == '': break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) finally: tfp.close() finally: fp.close() if size >= 0 and read < size: raise ContentTooShortError( 'retrieval incomplete: got only %i out of %i bytes' % (read, size), result) return result
def update_event(self, inp=-1): self.set_output_val(0, nturl2path.url2pathname(self.input(0)))