コード例 #1
0
def save_soup(soup, target):
    ns = BeautifulSoup(
        '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />'
    )
    nm = ns.find('meta')
    metas = soup.findAll('meta', content=True)
    added = False
    for meta in metas:
        if 'charset' in meta.get('content', '').lower():
            meta.replaceWith(nm)
            added = True
    if not added:
        head = soup.find('head')
        if head is not None:
            head.insert(0, nm)

    selfdir = os.path.dirname(target)

    for tag in soup.findAll(['img', 'link', 'a']):
        for key in ('src', 'href'):
            path = tag.get(key, None)
            if path and os.path.isfile(path) and os.path.exists(
                    path) and os.path.isabs(path):
                tag[key] = unicode_path(
                    relpath(path, selfdir).replace(os.sep, '/'))

    html = unicode_type(soup)
    with open(target, 'wb') as f:
        f.write(html.encode('utf-8'))
コード例 #2
0
    def href(self, basedir=None):
        '''
        Return a URL pointing to this resource. If it is a file on the filesystem
        the URL is relative to `basedir`.

        `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
        If this resource has no basedir, then the current working directory is used as the basedir.
        '''
        from urllib import quote
        if basedir is None:
            if self._basedir:
                basedir = self._basedir
            else:
                basedir = os.getcwdu()
        if self.path is None:
            return self._href
        f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
        frag = '#'+quote(f) if self.fragment else ''
        if self.path == basedir:
            return ''+frag
        try:
            rpath = relpath(self.path, basedir)
        except OSError:  # On windows path and basedir could be on different drives
            rpath = self.path
        if isinstance(rpath, unicode):
            rpath = rpath.encode('utf-8')
        return quote(rpath.replace(os.sep, '/'))+frag
コード例 #3
0
ファイル: simple.py プロジェクト: Coi-l/calibre
def save_soup(soup, target):
    ns = BeautifulSoup('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
    nm = ns.find('meta')
    metas = soup.findAll('meta', content=True)
    added = False
    for meta in metas:
        if 'charset' in meta.get('content', '').lower():
            meta.replaceWith(nm)
            added = True
    if not added:
        head = soup.find('head')
        if head is not None:
            head.insert(0, nm)

    selfdir = os.path.dirname(target)

    for tag in soup.findAll(['img', 'link', 'a']):
        for key in ('src', 'href'):
            path = tag.get(key, None)
            if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
                tag[key] = unicode_path(relpath(path, selfdir).replace(os.sep, '/'))

    html = unicode(soup)
    with open(target, 'wb') as f:
        f.write(html.encode('utf-8'))
コード例 #4
0
def save_soup(soup, target):
    for meta in soup.findAll('meta', content=True):
        if 'charset' in meta['content'].lower():
            meta.extract()
    for meta in soup.findAll('meta', charset=True):
        meta.extract()
    head = soup.find('head')
    if head is not None:
        nm = soup.new_tag('meta', charset='utf-8')
        head.insert(0, nm)

    selfdir = os.path.dirname(target)

    for tag in soup.findAll(['img', 'link', 'a']):
        for key in ('src', 'href'):
            path = tag.get(key, None)
            if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
                tag[key] = unicode_path(relpath(path, selfdir).replace(os.sep, '/'))

    html = str(soup)
    with open(target, 'wb') as f:
        f.write(html.encode('utf-8'))
コード例 #5
0
ファイル: simple.py プロジェクト: JimmXinu/calibre
def save_soup(soup, target):
    for meta in soup.findAll('meta', content=True):
        if 'charset' in meta['content'].lower():
            meta.extract()
    for meta in soup.findAll('meta', charset=True):
        meta.extract()
    head = soup.find('head')
    if head is not None:
        nm = soup.new_tag('meta', charset='utf-8')
        head.insert(0, nm)

    selfdir = os.path.dirname(target)

    for tag in soup.findAll(['img', 'link', 'a']):
        for key in ('src', 'href'):
            path = tag.get(key, None)
            if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
                tag[key] = unicode_path(relpath(path, selfdir).replace(os.sep, '/'))

    html = unicode_type(soup)
    with open(target, 'wb') as f:
        f.write(html.encode('utf-8'))