def canonicalize_url(url): # mechanize does not handle quoting automatically if re.search(r'\s+', url) is not None: purl = list(urlparse(url)) for i in range(2, 6): purl[i] = quote(purl[i]) url = urlunparse(purl) return url
def fetch_url(self, url): data = None self.log.debug('Fetching', url) st = time.time() # Check for a URL pointing to the local filesystem and special case it # for efficiency and robustness. Bypasses delay checking as it does not # apply to local fetches. Ensures that unicode paths that are not # representable in the filesystem_encoding work. is_local = 0 if url.startswith('file://'): is_local = 7 elif url.startswith('file:'): is_local = 5 if is_local > 0: url = url[is_local:] if iswindows and url.startswith('/'): url = url[1:] with open(url, 'rb') as f: data = response(f.read()) data.newurl = 'file:'+url # This is what mechanize does for # local URLs self.log.debug('Fetched %s in %.1f seconds' % (url, time.time() - st)) return data delta = time.time() - self.last_fetch_at if delta < self.delay: time.sleep(self.delay - delta) # mechanize does not handle quoting automatically if re.search(r'\s+', url) is not None: if isinstance(url, unicode_type): url = url.encode('utf-8') purl = list(urlparse(url)) for i in range(2, 6): purl[i] = quote(purl[i]) url = urlunparse(purl).decode('utf-8') open_func = getattr(self.browser, 'open_novisit', self.browser.open) try: with closing(open_func(url, timeout=self.timeout)) as f: data = response(f.read()+f.read()) data.newurl = f.geturl() except URLError as err: if hasattr(err, 'code') and err.code in responses: raise FetchError(responses[err.code]) if getattr(err, 'reason', [0])[0] == 104 or \ getattr(getattr(err, 'args', [None])[0], 'errno', None) in (-2, -3): # Connection reset by peer or Name or service not known self.log.debug('Temporary error, retrying in 1 second') time.sleep(1) with closing(open_func(url, timeout=self.timeout)) as f: data = response(f.read()+f.read()) data.newurl = f.geturl() else: raise err finally: self.last_fetch_at = time.time() self.log.debug('Fetched %s in %f seconds' % (url, time.time() - st)) return data
def canonicalize_url(url): # mechanize does not handle quoting automatically if re.search(r'\s+', url) is not None: if isinstance(url, unicode_type): url = url.encode('utf-8') purl = list(urlparse(url)) for i in range(2, 6): purl[i] = as_bytes(quote(purl[i])) url = urlunparse(purl).decode('utf-8') return url
def localize_website_link(url): lc = lang_as_iso639_1(get_lang()) langs = website_languages() if lc == 'en' or lc not in langs: return url from polyglot.urllib import urlparse, urlunparse parts = urlparse(url) path = '/{}{}'.format(lc, parts.path) parts = list(parts) parts[2] = path return urlunparse(parts)
def url_to_local_path(cls, url, base): path = url.path isabs = False if iswindows and path.startswith('/'): path = path[1:] isabs = True path = urlunparse(('', '', path, url.params, url.query, '')) path = urlunquote(path) if isabs or os.path.isabs(path): return path return os.path.abspath(os.path.join(base, path))
def localize_user_manual_link(url): lc = lang_code_for_user_manual() if not lc: return url from polyglot.urllib import urlparse, urlunparse parts = urlparse(url) path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path or '') path = '/%s%s' % (lc, path) parts = list(parts) parts[2] = path return urlunparse(parts)
def localize_user_manual_link(url): lc = lang_as_iso639_1(get_lang()) if lc == 'en': return url stats = user_manual_stats() if stats.get(lc, 0) < 0.3: return url from polyglot.urllib import urlparse, urlunparse parts = urlparse(url) path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path or '') path = '/%s%s' % (lc, path) parts = list(parts) parts[2] = path return urlunparse(parts)
def url(self): # copy the original query string query_string = dict(self.query_string) # iterate through macros and set the position in the querystring for macro, name in self.macro_map.items(): if hasattr(self, macro): # set the name/value pair query_string[name] = [getattr(self, macro)] else: # remove the name/value pair del (query_string[name]) # copy the url parts and substitute in our new query string url_parts = list(self.url_parts) url_parts[4] = urlencode(query_string, 1) # recompose and return url return urlunparse(tuple(url_parts))
def url(self): # copy the original query string query_string = dict(self.query_string) # iterate through macros and set the position in the querystring for macro, name in self.macro_map.items(): if hasattr(self, macro): # set the name/value pair query_string[name] = [getattr(self, macro)] else: # remove the name/value pair del(query_string[name]) # copy the url parts and substitute in our new query string url_parts = list(self.url_parts) url_parts[4] = urlencode(query_string, 1) # recompose and return url return urlunparse(tuple(url_parts))
def __call__(self, url): if url and url.startswith('#'): repl = self.id_map.get(self.base, {}).get(url[1:]) if repl is None or repl == url[1:]: return url self.replaced = True return '#' + repl name = self.container.href_to_name(url, self.base) if not name: return url id_map = self.id_map.get(name) if id_map is None: return url purl = urlparse(url) nfrag = id_map.get(purl.fragment) if nfrag is None: return url purl = purl._replace(fragment=nfrag) href = urlunparse(purl) if href != url: self.replaced = True return href
def __init__(self, opts): self.library_path = opts.library_path or prefs['library_path'] self.timeout = opts.timeout self.url = None if self.library_path is None: raise SystemExit( 'No saved library path, either run the GUI or use the' ' --with-library option') if self.library_path.partition(':')[0] in ('http', 'https'): parts = urlparse(self.library_path) self.library_id = parts.fragment or None self.url = urlunparse(parts._replace(fragment='')).rstrip('/') self.br = browser(handle_refresh=False, user_agent='{} {}'.format( __appname__, __version__)) self.is_remote = True username, password = read_credentials(opts) self.has_credentials = False if username and password: self.br.add_password(self.url, username, password) self.has_credentials = True if self.library_id == '-': self.list_libraries() raise SystemExit() else: self.library_path = os.path.expanduser(self.library_path) if not singleinstance('db'): ext = '.exe' if iswindows else '' raise SystemExit( _('Another calibre program such as {} or the main calibre program is running.' ' Having multiple programs that can make changes to a calibre library' ' running at the same time is a bad idea. calibredb can connect directly' ' to a running calibre Content server, to make changes through it, instead.' ' See the documentation of the {} option for details.'). format('calibre-server' + ext, '--with-library')) self._db = None self.is_remote = False
def __init__(self, opts): self.library_path = opts.library_path or prefs['library_path'] self.url = None if self.library_path is None: raise SystemExit( 'No saved library path, either run the GUI or use the' ' --with-library option' ) if self.library_path.partition(':')[0] in ('http', 'https'): parts = urlparse(self.library_path) self.library_id = parts.fragment or None self.url = urlunparse(parts._replace(fragment='')).rstrip('/') self.br = browser(handle_refresh=False, user_agent='{} {}'.format(__appname__, __version__)) self.is_remote = True username, password = read_credentials(opts) self.has_credentials = False if username and password: self.br.add_password(self.url, username, password) self.has_credentials = True if self.library_id == '-': self.list_libraries() raise SystemExit() else: self.library_path = os.path.expanduser(self.library_path) if not singleinstance('db'): ext = '.exe' if iswindows else '' raise SystemExit(_( 'Another calibre program such as {} or the main calibre program is running.' ' Having multiple programs that can make changes to a calibre library' ' running at the same time is a bad idea. calibredb can connect directly' ' to a running calibre content server, to make changes through it, instead.' ' See the documentation of the {} option for details.' ).format('calibre-server' + ext, '--with-library') ) self._db = None self.is_remote = False