Exemple #1
def canonicalize_url(url):
    # mechanize does not handle quoting automatically
    if re.search(r'\s+', url) is not None:
        purl = list(urlparse(url))
        for i in range(2, 6):
            purl[i] = quote(purl[i])
        url = urlunparse(purl)
    return url
Exemple #2
    def fetch_url(self, url):
        data = None
        self.log.debug('Fetching', url)
        st = time.time()

        # Check for a URL pointing to the local filesystem and special case it
        # for efficiency and robustness. Bypasses delay checking as it does not
        # apply to local fetches. Ensures that unicode paths that are not
        # representable in the filesystem_encoding work.
        is_local = 0
        if url.startswith('file://'):
            is_local = 7
        elif url.startswith('file:'):
            is_local = 5
        if is_local > 0:
            url = url[is_local:]
            if iswindows and url.startswith('/'):
                url = url[1:]
            with open(url, 'rb') as f:
                data = response(f.read())
                data.newurl = 'file:'+url  # This is what mechanize does for
                # local URLs
            self.log.debug('Fetched %s in %.1f seconds' % (url, time.time() - st))
            return data

        delta = time.time() - self.last_fetch_at
        if delta < self.delay:
            time.sleep(self.delay - delta)
        # mechanize does not handle quoting automatically
        if re.search(r'\s+', url) is not None:
            if isinstance(url, unicode_type):
                url = url.encode('utf-8')
            purl = list(urlparse(url))
            for i in range(2, 6):
                purl[i] = quote(purl[i])
            url = urlunparse(purl).decode('utf-8')
        open_func = getattr(self.browser, 'open_novisit', self.browser.open)
            with closing(open_func(url, timeout=self.timeout)) as f:
                data = response(f.read()+f.read())
                data.newurl = f.geturl()
        except URLError as err:
            if hasattr(err, 'code') and err.code in responses:
                raise FetchError(responses[err.code])
            if getattr(err, 'reason', [0])[0] == 104 or \
                getattr(getattr(err, 'args', [None])[0], 'errno', None) in (-2,
                        -3):  # Connection reset by peer or Name or service not known
                self.log.debug('Temporary error, retrying in 1 second')
                with closing(open_func(url, timeout=self.timeout)) as f:
                    data = response(f.read()+f.read())
                    data.newurl = f.geturl()
                raise err
            self.last_fetch_at = time.time()
        self.log.debug('Fetched %s in %f seconds' % (url, time.time() - st))
        return data
Exemple #3
def canonicalize_url(url):
    # mechanize does not handle quoting automatically
    if re.search(r'\s+', url) is not None:
        if isinstance(url, unicode_type):
            url = url.encode('utf-8')
        purl = list(urlparse(url))
        for i in range(2, 6):
            purl[i] = as_bytes(quote(purl[i]))
        url = urlunparse(purl).decode('utf-8')
    return url
Exemple #4
def localize_website_link(url):
    lc = lang_as_iso639_1(get_lang())
    langs = website_languages()
    if lc == 'en' or lc not in langs:
        return url
    from polyglot.urllib import urlparse, urlunparse
    parts = urlparse(url)
    path = '/{}{}'.format(lc, parts.path)
    parts = list(parts)
    parts[2] = path
    return urlunparse(parts)
Exemple #5
 def url_to_local_path(cls, url, base):
     path = url.path
     isabs = False
     if iswindows and path.startswith('/'):
         path = path[1:]
         isabs = True
     path = urlunparse(('', '', path, url.params, url.query, ''))
     path = urlunquote(path)
     if isabs or os.path.isabs(path):
         return path
     return os.path.abspath(os.path.join(base, path))
Exemple #6
 def url_to_local_path(cls, url, base):
     path = url.path
     isabs = False
     if iswindows and path.startswith('/'):
         path = path[1:]
         isabs = True
     path = urlunparse(('', '', path, url.params, url.query, ''))
     path = urlunquote(path)
     if isabs or os.path.isabs(path):
         return path
     return os.path.abspath(os.path.join(base, path))
Exemple #7
def localize_website_link(url):
    lc = lang_as_iso639_1(get_lang())
    langs = website_languages()
    if lc == 'en' or lc not in langs:
        return url
    from polyglot.urllib import urlparse, urlunparse
    parts = urlparse(url)
    path = '/{}{}'.format(lc, parts.path)
    parts = list(parts)
    parts[2] = path
    return urlunparse(parts)
Exemple #8
def localize_user_manual_link(url):
    lc = lang_code_for_user_manual()
    if not lc:
        return url
    from polyglot.urllib import urlparse, urlunparse
    parts = urlparse(url)
    path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path
                  or '')
    path = '/%s%s' % (lc, path)
    parts = list(parts)
    parts[2] = path
    return urlunparse(parts)
Exemple #9
def localize_user_manual_link(url):
    lc = lang_as_iso639_1(get_lang())
    if lc == 'en':
        return url
    stats = user_manual_stats()
    if stats.get(lc, 0) < 0.3:
        return url
    from polyglot.urllib import urlparse, urlunparse
    parts = urlparse(url)
    path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path or '')
    path = '/%s%s' % (lc, path)
    parts = list(parts)
    parts[2] = path
    return urlunparse(parts)
Exemple #10
def localize_user_manual_link(url):
    lc = lang_as_iso639_1(get_lang())
    if lc == 'en':
        return url
    stats = user_manual_stats()
    if stats.get(lc, 0) < 0.3:
        return url
    from polyglot.urllib import urlparse, urlunparse
    parts = urlparse(url)
    path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path or '')
    path = '/%s%s' % (lc, path)
    parts = list(parts)
    parts[2] = path
    return urlunparse(parts)
Exemple #11
    def url(self):
        # copy the original query string
        query_string = dict(self.query_string)

        # iterate through macros and set the position in the querystring
        for macro, name in self.macro_map.items():
            if hasattr(self, macro):
                # set the name/value pair
                query_string[name] = [getattr(self, macro)]
                # remove the name/value pair
                del (query_string[name])

        # copy the url parts and substitute in our new query string
        url_parts = list(self.url_parts)
        url_parts[4] = urlencode(query_string, 1)

        # recompose and return url
        return urlunparse(tuple(url_parts))
Exemple #12
    def url(self):
        # copy the original query string
        query_string = dict(self.query_string)

        # iterate through macros and set the position in the querystring
        for macro, name in self.macro_map.items():
            if hasattr(self, macro):
                # set the name/value pair
                query_string[name] = [getattr(self, macro)]
                # remove the name/value pair

        # copy the url parts and substitute in our new query string
        url_parts = list(self.url_parts)
        url_parts[4] = urlencode(query_string, 1)

        # recompose and return url
        return urlunparse(tuple(url_parts))
Exemple #13
 def __call__(self, url):
     if url and url.startswith('#'):
         repl = self.id_map.get(self.base, {}).get(url[1:])
         if repl is None or repl == url[1:]:
             return url
         self.replaced = True
         return '#' + repl
     name = self.container.href_to_name(url, self.base)
     if not name:
         return url
     id_map = self.id_map.get(name)
     if id_map is None:
         return url
     purl = urlparse(url)
     nfrag = id_map.get(purl.fragment)
     if nfrag is None:
         return url
     purl = purl._replace(fragment=nfrag)
     href = urlunparse(purl)
     if href != url:
         self.replaced = True
     return href
Exemple #14
 def __call__(self, url):
     if url and url.startswith('#'):
         repl = self.id_map.get(self.base, {}).get(url[1:])
         if repl is None or repl == url[1:]:
             return url
         self.replaced = True
         return '#' + repl
     name = self.container.href_to_name(url, self.base)
     if not name:
         return url
     id_map = self.id_map.get(name)
     if id_map is None:
         return url
     purl = urlparse(url)
     nfrag = id_map.get(purl.fragment)
     if nfrag is None:
         return url
     purl = purl._replace(fragment=nfrag)
     href = urlunparse(purl)
     if href != url:
         self.replaced = True
     return href
Exemple #15
 def __init__(self, opts):
     self.library_path = opts.library_path or prefs['library_path']
     self.timeout = opts.timeout
     self.url = None
     if self.library_path is None:
         raise SystemExit(
             'No saved library path, either run the GUI or use the'
             ' --with-library option')
     if self.library_path.partition(':')[0] in ('http', 'https'):
         parts = urlparse(self.library_path)
         self.library_id = parts.fragment or None
         self.url = urlunparse(parts._replace(fragment='')).rstrip('/')
         self.br = browser(handle_refresh=False,
                           user_agent='{} {}'.format(
                               __appname__, __version__))
         self.is_remote = True
         username, password = read_credentials(opts)
         self.has_credentials = False
         if username and password:
             self.br.add_password(self.url, username, password)
             self.has_credentials = True
         if self.library_id == '-':
             raise SystemExit()
         self.library_path = os.path.expanduser(self.library_path)
         if not singleinstance('db'):
             ext = '.exe' if iswindows else ''
             raise SystemExit(
                 _('Another calibre program such as {} or the main calibre program is running.'
                   ' Having multiple programs that can make changes to a calibre library'
                   ' running at the same time is a bad idea. calibredb can connect directly'
                   ' to a running calibre Content server, to make changes through it, instead.'
                   ' See the documentation of the {} option for details.').
                 format('calibre-server' + ext, '--with-library'))
         self._db = None
         self.is_remote = False
Exemple #16
 def __init__(self, opts):
     self.library_path = opts.library_path or prefs['library_path']
     self.url = None
     if self.library_path is None:
         raise SystemExit(
             'No saved library path, either run the GUI or use the'
             ' --with-library option'
     if self.library_path.partition(':')[0] in ('http', 'https'):
         parts = urlparse(self.library_path)
         self.library_id = parts.fragment or None
         self.url = urlunparse(parts._replace(fragment='')).rstrip('/')
         self.br = browser(handle_refresh=False, user_agent='{} {}'.format(__appname__, __version__))
         self.is_remote = True
         username, password = read_credentials(opts)
         self.has_credentials = False
         if username and password:
             self.br.add_password(self.url, username, password)
             self.has_credentials = True
         if self.library_id == '-':
             raise SystemExit()
         self.library_path = os.path.expanduser(self.library_path)
         if not singleinstance('db'):
             ext = '.exe' if iswindows else ''
             raise SystemExit(_(
                 'Another calibre program such as {} or the main calibre program is running.'
                 ' Having multiple programs that can make changes to a calibre library'
                 ' running at the same time is a bad idea. calibredb can connect directly'
                 ' to a running calibre content server, to make changes through it, instead.'
                 ' See the documentation of the {} option for details.'
             ).format('calibre-server' + ext, '--with-library')
         self._db = None
         self.is_remote = False