Esempio n. 1
0
    def open_local_file(self, url):
        import mimetypes, mimetools, email.utils
        try:
            from cStringIO import StringIO
        except ImportError:
            from StringIO import StringIO

        host, file = splithost(url)
        localname = url2pathname(file)
        try:
            stats = os.stat(localname)
        except OSError as e:
            raise IOError(e.errno, e.strerror, e.filename)

        size = stats.st_size
        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
        mtype = mimetypes.guess_type(url)[0]
        headers = mimetools.Message(StringIO('Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified)))
        if not host:
            urlfile = file
            if file[:1] == '/':
                urlfile = 'file://' + file
            return addinfourl(open(localname, 'rb'), headers, urlfile)
        host, port = splitport(host)
        if not port and socket.gethostbyname(host) in (localhost(), thishost()):
            urlfile = file
            if file[:1] == '/':
                urlfile = 'file://' + file
            return addinfourl(open(localname, 'rb'), headers, urlfile)
        raise IOError, ('local file error', 'not on local host')
Esempio n. 2
0
    def _parse_image(self, element):
        image_path = element.attrib['src']
        image_path = nturl2path.url2pathname(image_path)
        name = ''

        # Find caption text
        found_prev = self._find_prev_caption_text(element)

        if found_prev['found']:
            name = found_prev['prev'].text
        else:
            found_next = self._find_next_caption_text(element)
            if found_next['found']:
                name = found_next['next'].text

        if name != '':
            new_path = '{}.jpg'.format(
                re.sub(r'(?:[^a-zä-ö0-9]|(?<=[\'"])s)',
                       r'',
                       name,
                       flags=re.IGNORECASE))
            file_prefix = os.path.basename(
                os.path.splitext(self._save_path)[0])
            new_path = os.path.join('{}_images'.format(file_prefix), new_path)
            self._copy_rename_image_files(new_path, image_path)
            self._images.append({
                'name': self._convert_image_name(name),
                'image': new_path
            })
Esempio n. 3
0
    def retrieve(self, url, filename = None, reporthook = None, data = None):
        url = unwrap(toBytes(url))
        if self.tempcache and url in self.tempcache:
            return self.tempcache[url]
        type, url1 = splittype(url)
        if filename is None and (not type or type == 'file'):
            try:
                fp = self.open_local_file(url1)
                hdrs = fp.info()
                fp.close()
                return (url2pathname(splithost(url1)[1]), hdrs)
            except IOError:
                pass

        fp = self.open(url, data)
        try:
            headers = fp.info()
            if filename:
                tfp = open(filename, 'wb')
            else:
                import tempfile
                garbage, path = splittype(url)
                garbage, path = splithost(path or '')
                path, garbage = splitquery(path or '')
                path, garbage = splitattr(path or '')
                suffix = os.path.splitext(path)[1]
                fd, filename = tempfile.mkstemp(suffix)
                self.__tempfiles.append(filename)
                tfp = os.fdopen(fd, 'wb')
            try:
                result = (filename, headers)
                if self.tempcache is not None:
                    self.tempcache[url] = result
                bs = 8192
                size = -1
                read = 0
                blocknum = 0
                if reporthook:
                    if 'content-length' in headers:
                        size = int(headers['Content-Length'])
                    reporthook(blocknum, bs, size)
                while 1:
                    block = fp.read(bs)
                    if block == '':
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    if reporthook:
                        reporthook(blocknum, bs, size)

            finally:
                tfp.close()

        finally:
            fp.close()

        if size >= 0 and read < size:
            raise ContentTooShortError('retrieval incomplete: got only %i out of %i bytes' % (read, size), result)
        return result
Esempio n. 4
0
    def open_local_file(self, url):
        import mimetypes, mimetools, email.utils
        try:
            from cStringIO import StringIO
        except ImportError:
            from StringIO import StringIO

        host, file = splithost(url)
        localname = url2pathname(file)
        try:
            stats = os.stat(localname)
        except OSError as e:
            raise IOError(e.errno, e.strerror, e.filename)

        size = stats.st_size
        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
        mtype = mimetypes.guess_type(url)[0]
        headers = mimetools.Message(
            StringIO(
                'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
                (mtype or 'text/plain', size, modified)))
        if not host:
            urlfile = file
            if file[:1] == '/':
                urlfile = 'file://' + file
            return addinfourl(open(localname, 'rb'), headers, urlfile)
        host, port = splitport(host)
        if not port and socket.gethostbyname(host) in (localhost(),
                                                       thishost()):
            urlfile = file
            if file[:1] == '/':
                urlfile = 'file://' + file
            return addinfourl(open(localname, 'rb'), headers, urlfile)
        raise IOError, ('local file error', 'not on local host')
Esempio n. 5
0
 def OnLinkClicked(self, linkinfo):
     href = linkinfo.GetHref().split('.html')
     if len(href) == 1:
         anchor = href[0]
         if anchor == '.':
             self.index()
         elif anchor[:5] == 'file:':
             self.open(nturl2path.url2pathname(anchor[5:]))
         else:
             self.loadDoc(self.moduleName, anchor)
     elif len(href) == 2:
         moduleName, anchor = href
         self.loadDoc(moduleName, anchor)
Esempio n. 6
0
 def OnLinkClicked(self, linkinfo):
     href                    = linkinfo.GetHref().split('.html')
     if len(href)==1:
         anchor              = href[0]
         if anchor == '.':
             self.index()
         elif anchor[:5]== 'file:':
             self.open(nturl2path.url2pathname(anchor[5:]))
         else:
             self.loadDoc(self.moduleName,anchor)
     elif len(href)==2:
         moduleName, anchor  = href
         self.loadDoc(moduleName,anchor)
Esempio n. 7
0
import nturl2path

file = r"c:\my\little\pony"

print(nturl2path.pathname2url(file))
print(nturl2path.url2pathname(nturl2path.pathname2url(file)))

## ///C|/my/little/pony
## C:\my\little\pony
Esempio n. 8
0
import nturl2path

file = r"c:\my\little\pony"

print nturl2path.pathname2url(file)
print nturl2path.url2pathname(nturl2path.pathname2url(file))

## ///C|/my/little/pony
## C:\my\little\pony
Esempio n. 9
0
    def retrieve(self, url, filename=None, reporthook=None, data=None):
        url = unwrap(toBytes(url))
        if self.tempcache and url in self.tempcache:
            return self.tempcache[url]
        else:
            type, url1 = splittype(url)
            if filename is None and (not type or type == 'file'):
                try:
                    fp = self.open_local_file(url1)
                    hdrs = fp.info()
                    fp.close()
                    return (url2pathname(splithost(url1)[1]), hdrs)
                except IOError:
                    pass

            fp = self.open(url, data)
            try:
                headers = fp.info()
                if filename:
                    tfp = open(filename, 'wb')
                else:
                    import tempfile
                    garbage, path = splittype(url)
                    garbage, path = splithost(path or '')
                    path, garbage = splitquery(path or '')
                    path, garbage = splitattr(path or '')
                    suffix = os.path.splitext(path)[1]
                    fd, filename = tempfile.mkstemp(suffix)
                    self.__tempfiles.append(filename)
                    tfp = os.fdopen(fd, 'wb')
                try:
                    result = (filename, headers)
                    if self.tempcache is not None:
                        self.tempcache[url] = result
                    bs = 8192
                    size = -1
                    read = 0
                    blocknum = 0
                    if 'content-length' in headers:
                        size = int(headers['Content-Length'])
                    if reporthook:
                        reporthook(blocknum, bs, size)
                    while 1:
                        block = fp.read(bs)
                        if block == '':
                            break
                        read += len(block)
                        tfp.write(block)
                        blocknum += 1
                        if reporthook:
                            reporthook(blocknum, bs, size)

                finally:
                    tfp.close()

            finally:
                fp.close()

            if size >= 0 and read < size:
                raise ContentTooShortError(
                    'retrieval incomplete: got only %i out of %i bytes' %
                    (read, size), result)
            return result
Esempio n. 10
0
 def update_event(self, inp=-1):
     self.set_output_val(0, nturl2path.url2pathname(self.input(0)))