def href(self, basedir=None): ''' Return a URL pointing to this resource. If it is a file on the filesystem the URL is relative to `basedir`. `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`). If this resource has no basedir, then the current working directory is used as the basedir. ''' if basedir is None: if self._basedir: basedir = self._basedir else: basedir = os.getcwdu() if self.path is None: return self._href f = self.fragment.encode('utf-8') if isinstance( self.fragment, unicode_type) else self.fragment frag = '#' + quote(f) if self.fragment else '' if self.path == basedir: return '' + frag try: rpath = relpath(self.path, basedir) except OSError: # On windows path and basedir could be on different drives rpath = self.path if isinstance(rpath, unicode_type): rpath = rpath.encode('utf-8') return quote(rpath.replace(os.sep, '/')) + frag
def href(self, basedir=None): ''' Return a URL pointing to this resource. If it is a file on the filesystem the URL is relative to `basedir`. `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`). If this resource has no basedir, then the current working directory is used as the basedir. ''' if basedir is None: if self._basedir: basedir = self._basedir else: basedir = os.getcwdu() if self.path is None: return self._href f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment frag = '#'+quote(f) if self.fragment else '' if self.path == basedir: return ''+frag try: rpath = relpath(self.path, basedir) except OSError: # On windows path and basedir could be on different drives rpath = self.path if isinstance(rpath, unicode_type): rpath = rpath.encode('utf-8') return quote(rpath.replace(os.sep, '/'))+frag
def link_replacer(base, url): if url.startswith('#'): frag = urlunquote(url[1:]) if not frag: return url changed.add(base) return resource_template.format(encode_url(base, frag)) purl = urlparse(url) if purl.netloc or purl.query: return url if purl.scheme and purl.scheme != 'file': return url if not purl.path or purl.path.startswith('/'): return url url, frag = purl.path, purl.fragment name = container.href_to_name(url, base) if name: if container.has_name_and_is_not_empty(name): frag = urlunquote(frag) url = resource_template.format(encode_url(name, frag)) else: if isinstance(name, unicode_type): name = name.encode('utf-8') url = 'missing:' + force_unicode(quote(name), 'utf-8') changed.add(base) return url
def link_replacer(base, url): if url.startswith('#'): frag = urlunquote(url[1:]) if not frag: return url changed.add(base) return resource_template.format(encode_url(base, frag)) purl = urlparse(url) if purl.netloc or purl.query: return url if purl.scheme and purl.scheme != 'file': return url if not purl.path or purl.path.startswith('/'): return url url, frag = purl.path, purl.fragment name = self.href_to_name(url, base) if name: if self.has_name_and_is_not_empty(name): frag = urlunquote(frag) url = resource_template.format(encode_url(name, frag)) else: if isinstance(name, unicode_type): name = name.encode('utf-8') url = 'missing:' + force_unicode(quote(name), 'utf-8') changed.add(base) return url
def qquote(val, use_plus=True): if not isinstance(val, bytes): val = val.encode('utf-8') ans = quote_plus(val) if use_plus else quote(val) if isinstance(ans, bytes): ans = ans.decode('utf-8') return ans
def resource_adder(self, link_, base=None): from polyglot.urllib import quote link, frag = self.link_to_local_path(link_, base=base) if link is None: return link_ try: if base and not os.path.isabs(link): link = os.path.join(base, link) link = os.path.abspath(link) except: return link_ if not os.access(link, os.R_OK): return link_ if os.path.isdir(link): self.log.warn(link_, 'is a link to a directory. Ignoring.') return link_ if not self.is_case_sensitive(tempfile.gettempdir()): link = link.lower() if link not in self.added_resources: bhref = os.path.basename(link) id, href = self.oeb.manifest.generate( id='added', href=sanitize_file_name(bhref)) guessed = self.guess_type(href)[0] media_type = guessed or self.BINARY_MIME if media_type == 'text/plain': self.log.warn('Ignoring link to text file %r' % link_) return None if media_type == self.BINARY_MIME: # Check for the common case, images try: img = what(link) except EnvironmentError: pass else: if img: media_type = self.guess_type( 'dummy.' + img)[0] or self.BINARY_MIME self.oeb.log.debug('Added', link) self.oeb.container = self.DirContainer(os.path.dirname(link), self.oeb.log, ignore_opf=True) # Load into memory item = self.oeb.manifest.add(id, href, media_type) # bhref refers to an already existing file. The read() method of # DirContainer will call unquote on it before trying to read the # file, therefore we quote it here. if isinstance(bhref, unicode_type): bhref = bhref.encode('utf-8') item.html_input_href = as_unicode(quote(bhref)) if guessed in self.OEB_STYLES: item.override_css_fetch = partial(self.css_import_handler, os.path.dirname(link)) item.data self.added_resources[link] = href nlink = self.added_resources[link] if frag: nlink = '#'.join((nlink, frag)) return nlink
def canonicalize_url(url): # mechanize does not handle quoting automatically if re.search(r'\s+', url) is not None: purl = list(urlparse(url)) for i in range(2, 6): purl[i] = quote(purl[i]) url = urlunparse(purl) return url
def fetch_url(self, url): data = None self.log.debug('Fetching', url) st = time.time() # Check for a URL pointing to the local filesystem and special case it # for efficiency and robustness. Bypasses delay checking as it does not # apply to local fetches. Ensures that unicode paths that are not # representable in the filesystem_encoding work. is_local = 0 if url.startswith('file://'): is_local = 7 elif url.startswith('file:'): is_local = 5 if is_local > 0: url = url[is_local:] if iswindows and url.startswith('/'): url = url[1:] with open(url, 'rb') as f: data = response(f.read()) data.newurl = 'file:'+url # This is what mechanize does for # local URLs self.log.debug('Fetched %s in %.1f seconds' % (url, time.time() - st)) return data delta = time.time() - self.last_fetch_at if delta < self.delay: time.sleep(self.delay - delta) # mechanize does not handle quoting automatically if re.search(r'\s+', url) is not None: if isinstance(url, unicode_type): url = url.encode('utf-8') purl = list(urlparse(url)) for i in range(2, 6): purl[i] = quote(purl[i]) url = urlunparse(purl).decode('utf-8') open_func = getattr(self.browser, 'open_novisit', self.browser.open) try: with closing(open_func(url, timeout=self.timeout)) as f: data = response(f.read()+f.read()) data.newurl = f.geturl() except URLError as err: if hasattr(err, 'code') and err.code in responses: raise FetchError(responses[err.code]) if getattr(err, 'reason', [0])[0] == 104 or \ getattr(getattr(err, 'args', [None])[0], 'errno', None) in (-2, -3): # Connection reset by peer or Name or service not known self.log.debug('Temporary error, retrying in 1 second') time.sleep(1) with closing(open_func(url, timeout=self.timeout)) as f: data = response(f.read()+f.read()) data.newurl = f.geturl() else: raise err finally: self.last_fetch_at = time.time() self.log.debug('Fetched %s in %f seconds' % (url, time.time() - st)) return data
def resource_adder(self, link_, base=None): from polyglot.urllib import quote link, frag = self.link_to_local_path(link_, base=base) if link is None: return link_ try: if base and not os.path.isabs(link): link = os.path.join(base, link) link = os.path.abspath(link) except: return link_ if not os.access(link, os.R_OK): return link_ if os.path.isdir(link): self.log.warn(link_, 'is a link to a directory. Ignoring.') return link_ if not self.is_case_sensitive(tempfile.gettempdir()): link = link.lower() if link not in self.added_resources: bhref = os.path.basename(link) id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref)) guessed = self.guess_type(href)[0] media_type = guessed or self.BINARY_MIME if media_type == 'text/plain': self.log.warn('Ignoring link to text file %r'%link_) return None if media_type == self.BINARY_MIME: # Check for the common case, images try: img = what(link) except EnvironmentError: pass else: if img: media_type = self.guess_type('dummy.'+img)[0] or self.BINARY_MIME self.oeb.log.debug('Added', link) self.oeb.container = self.DirContainer(os.path.dirname(link), self.oeb.log, ignore_opf=True) # Load into memory item = self.oeb.manifest.add(id, href, media_type) # bhref refers to an already existing file. The read() method of # DirContainer will call unquote on it before trying to read the # file, therefore we quote it here. if isinstance(bhref, unicode_type): bhref = bhref.encode('utf-8') item.html_input_href = unicode_type(quote(bhref)) if guessed in self.OEB_STYLES: item.override_css_fetch = partial( self.css_import_handler, os.path.dirname(link)) item.data self.added_resources[link] = href nlink = self.added_resources[link] if frag: nlink = '#'.join((nlink, frag)) return nlink
def canonicalize_url(url): # mechanize does not handle quoting automatically if re.search(r'\s+', url) is not None: if isinstance(url, unicode_type): url = url.encode('utf-8') purl = list(urlparse(url)) for i in range(2, 6): purl[i] = as_bytes(quote(purl[i])) url = urlunparse(purl).decode('utf-8') return url
def book_filename(rd, book_id, mi, fmt, as_encoded_unicode=False): au = authors_to_string(mi.authors or [_('Unknown')]) title = mi.title or _('Unknown') ext = (fmt or '').lower() if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''): ext = 'kepub.epub' fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, ext) if as_encoded_unicode: # See https://tools.ietf.org/html/rfc6266 fname = sanitize_file_name_unicode(fname).encode('utf-8') fname = quote(fname).decode('ascii') else: fname = ascii_filename(fname).replace('"', '_') return fname
def icon_map(): global _icon_map with _icon_map_lock: if _icon_map is None: from calibre.gui2 import gprefs _icon_map = category_icon_map.copy() custom_icons = gprefs.get('tags_browser_category_icons', {}) for k, v in iteritems(custom_icons): if os.access(os.path.join(config_dir, 'tb_icons', v), os.R_OK): _icon_map[k] = '_' + quote(v) _icon_map['file_type_icons'] = { k:'mimetypes/%s.png' % v for k, v in iteritems(EXT_MAP) } return _icon_map
def book_filename(rd, book_id, mi, fmt, as_encoded_unicode=False): au = authors_to_string(mi.authors or [_('Unknown')]) title = mi.title or _('Unknown') ext = (fmt or '').lower() if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''): ext = 'kepub.epub' fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, ext) if as_encoded_unicode: # See https://tools.ietf.org/html/rfc6266 fname = sanitize_file_name(fname).encode('utf-8') fname = unicode_type(quote(fname)) else: fname = ascii_filename(fname).replace('"', '_') return fname
def book_filename(rd, book_id, mi, fmt, as_encoded_unicode=False): au = authors_to_string(mi.authors or [_('Unknown')]) title = mi.title or _('Unknown') ext = (fmt or '').lower() fname = f'{title[:30]} - {au[:30]}_{book_id}.{ext}' if as_encoded_unicode: # See https://tools.ietf.org/html/rfc6266 fname = sanitize_file_name(fname).encode('utf-8') fname = str(quote(fname)) else: fname = ascii_filename(fname).replace('"', '_') if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''): fname = fname.replace('!', '_') fname += '.epub' return fname
def a(filename, data=None, status=OK, method='POST', username='******', add_duplicates='n', job_id=1): r, data = make_request(conn, '/cdb/add-book/{}/{}/{}'.format( job_id, add_duplicates, quote(filename.encode('utf-8'))), username=username, password='******', prefix='', method=method, data=data) ae(status, r.status) return data
def uniquote(raw): if isinstance(raw, str): raw = raw.encode('utf-8') return str(quote(raw))
def urls_from_identifiers(identifiers): # {{{ identifiers = {k.lower(): v for k, v in iteritems(identifiers)} ans = [] keys_left = set(identifiers) def add(name, k, val, url): ans.append((name, k, val, url)) keys_left.discard(k) rules = msprefs['id_link_rules'] if rules: formatter = EvalFormatter() for k, val in iteritems(identifiers): val = val.replace('|', ',') vals = { 'id': unicode_type( quote(val if isinstance(val, bytes) else val.encode('utf-8' ))) } items = rules.get(k) or () for name, template in items: try: url = formatter.safe_format(template, vals, '', vals) except Exception: import traceback traceback.format_exc() continue add(name, k, val, url) for plugin in all_metadata_plugins(): try: for id_type, id_val, url in plugin.get_book_urls(identifiers): add(plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url) except Exception: pass isbn = identifiers.get('isbn', None) if isbn: add(isbn, 'isbn', isbn, 'https://www.worldcat.org/isbn/' + isbn) doi = identifiers.get('doi', None) if doi: add('DOI', 'doi', doi, 'https://dx.doi.org/' + doi) arxiv = identifiers.get('arxiv', None) if arxiv: add('arXiv', 'arxiv', arxiv, 'https://arxiv.org/abs/' + arxiv) oclc = identifiers.get('oclc', None) if oclc: add('OCLC', 'oclc', oclc, 'https://www.worldcat.org/oclc/' + oclc) issn = check_issn(identifiers.get('issn', None)) if issn: add(issn, 'issn', issn, 'https://www.worldcat.org/issn/' + issn) q = {'http', 'https', 'file'} for k, url in iteritems(identifiers): if url and re.match(r'ur[il]\d*$', k) is not None: url = url[:8].replace('|', ':') + url[8:].replace('|', ',') if url.partition(':')[0].lower() in q: parts = urlparse(url) name = parts.netloc or parts.path add(name, k, url, url) for k in tuple(keys_left): val = identifiers.get(k) if val: url = val[:8].replace('|', ':') + val[8:].replace('|', ',') if url.partition(':')[0].lower() in q: parts = urlparse(url) name = parts.netloc or parts.path add(name, k, url, url) return ans
def uniquote(raw): if isinstance(raw, unicode_type): raw = raw.encode('utf-8') return unicode_type(quote(raw))
def a(filename, data=None, status=OK, method='POST', username='******', add_duplicates='n', job_id=1): r, data = make_request(conn, '/cdb/add-book/{}/{}/{}'.format(job_id, add_duplicates, quote(filename.encode('utf-8'))), username=username, password='******', prefix='', method=method, data=data) ae(status, r.status) return data