def _url2path(self, url): """Convert a server-side URL into a client-side path.""" path = urlunquote(urlparse(url).path) root = urlunquote(self._url_p.path) path = path[len(root) - 1:].decode("utf8") while path.endswith("/"): path = path[:-1] return path
def urldecode(qs): r = [] for pair in qs.replace(';', '&').split('&'): if not pair: continue nv = pair.split('=', 1) if len(nv) != 2: nv.append('') key = urlunquote(nv[0].replace('+', ' '), encoding='utf-8') value = urlunquote(nv[1].replace('+', ' '), encoding='utf-8') r.append((key, value)) return r
def _parse_qsl(qs): r = [] for pair in qs.replace(';', '&').split('&'): if not pair: continue nv = pair.split('=', 1) if len(nv) != 2: nv.append('') key = urlunquote(nv[0].replace('+', ' ')) value = urlunquote(nv[1].replace('+', ' ')) r.append((key, value)) return r
def escape_uri(uri, illegal_only=True, safe="/"): # pragma: no cover if not uri: return uri if illegal_only: return requote_uri(uri) else: urlparts = urlsplit(uri) path = urlquote(urlunquote(urlparts.path), safe=safe) query = urlquote(urlunquote(urlparts.query), safe=safe) fragment = urlquote(urlunquote(urlparts.fragment), safe=safe) return urlunsplit( (urlparts.scheme, urlparts.netloc, path, query, fragment))
def split_url(self, url): """Parse an IIIF API URL path into components. Will parse a URL or URL path that accords with either the parametrized or info API forms. Will raise an IIIFRequestError on failure. If self.identifier is set then url is assumed not to include the identifier. """ # clear data first identifier = self.identifier self.clear() # url must start with baseurl if set (including slash) if (self.baseurl is not None): (path, num) = re.subn('^' + self.baseurl, '', url, 1) if (num != 1): raise IIIFRequestError( text="Request URL does not start with base URL") url = path # Break up by path segments, count to decide format segs = url.split('/') if (identifier is not None): segs.insert(0, identifier) elif (self.allow_slashes_in_identifier): segs = self._allow_slashes_in_identifier_munger(segs) # Now have segments with identifier as first if (len(segs) > 5): raise IIIFRequestPathError( text="Request URL (%s) has too many path segments" % url) elif (len(segs) == 5): self.identifier = urlunquote(segs[0]) self.region = urlunquote(segs[1]) self.size = urlunquote(segs[2]) self.rotation = urlunquote(segs[3]) self.quality = self.strip_format(urlunquote(segs[4])) self.info = False elif (len(segs) == 2): self.identifier = urlunquote(segs[0]) info_name = self.strip_format(urlunquote(segs[1])) if (info_name != "info"): raise IIIFRequestError(text="Bad name for Image Information") if (self.api_version == '1.0'): if (self.format not in ['json', 'xml']): raise IIIFRequestError( text= "Invalid format for Image Information (json and xml allowed)" ) elif (self.format != 'json'): raise IIIFRequestError( text= "Invalid format for Image Information (only json allowed)") self.info = True elif (len(segs) == 1): self.identifier = urlunquote(segs[0]) raise IIIFRequestBaseURI() else: raise IIIFRequestPathError( text="Bad number of path segments in request") return (self)
def split_url(self, url): """Parse an IIIF API URL path into components. Will parse a URL or URL path that accords with either the parametrized or info API forms. Will raise an IIIFRequestError on failure. If self.identifier is set then url is assumed not to include the identifier. """ # clear data first identifier = self.identifier self.clear() # url must start with baseurl if set (including slash) if (self.baseurl is not None): (path, num) = re.subn('^' + self.baseurl, '', url, 1) if (num != 1): raise IIIFRequestError( text="Request URL does not start with base URL") url = path # Break up by path segments, count to decide format segs = url.split('/') if (identifier is not None): segs.insert(0, identifier) elif (self.allow_slashes_in_identifier): segs = self._allow_slashes_in_identifier_munger(segs) # Now have segments with identifier as first if (len(segs) > 5): raise IIIFRequestPathError( text="Request URL (%s) has too many path segments" % url) elif (len(segs) == 5): self.identifier = urlunquote(segs[0]) self.region = urlunquote(segs[1]) self.size = urlunquote(segs[2]) self.rotation = urlunquote(segs[3]) self.quality = self.strip_format(urlunquote(segs[4])) self.info = False elif (len(segs) == 2): self.identifier = urlunquote(segs[0]) info_name = self.strip_format(urlunquote(segs[1])) if (info_name != "info"): raise IIIFRequestError( text="Bad name for Image Information") if (self.api_version == '1.0'): if (self.format not in ['json', 'xml']): raise IIIFRequestError( text="Invalid format for Image Information (json and xml allowed)") elif (self.format != 'json'): raise IIIFRequestError( text="Invalid format for Image Information (only json allowed)") self.info = True elif (len(segs) == 1): self.identifier = urlunquote(segs[0]) raise IIIFRequestBaseURI() else: raise IIIFRequestPathError( text="Bad number of path segments in request") return(self)
def _suggest_filename(self, content_disposition): default_name = re.sub(r'[\\:/]', '_', urlparse(self.filename).path) # See https://tools.ietf.org/html/rfc6266#section-4.1 matches = re.findall(r"filename\*?=(?:\"|.{0,10}?'[^']*')([^\"]+)", content_disposition or '') return urlunquote(matches[-1]) if matches else default_name
def test_specialchars(self, monkeypatch, requires_collections, get_storage_args, get_item): if getattr(self, 'dav_server', '') == 'radicale': pytest.xfail('Radicale is fundamentally broken.') monkeypatch.setattr('vdirsyncer.utils.generate_href', lambda x: x) uid = u'test @ foo ät bar град сатану' collection = 'test @ foo ät bar' s = self.storage_class(**get_storage_args(collection=collection)) item = get_item(uid=uid) href, etag = s.upload(item) item2, etag2 = s.get(href) if etag is not None: assert etag2 == etag assert_item_equals(item2, item) (_, etag3), = s.list() assert etag2 == etag3 assert collection in urlunquote(s.collection) if self.storage_class.storage_name.endswith('dav'): assert urlquote(uid, '/@:') in href
def path_to_url(self, url): scheme, netloc, path, params, query, fragment = urlparse(url) if (scheme or netloc or not path or url.startswith('/') or AMP_SUBSTITUTE in url or '.' not in os.path.split(path)[-1]): # Ignore URLs unless they are a relative link to a source file. # AMP_SUBSTITUTE is used internally by Markdown only for email. # No '.' in the last part of a path indicates path does not point to a file. return url # Determine the filepath of the target. target_path = os.path.join(os.path.dirname(self.file.src_path), urlunquote(path)) target_path = os.path.normpath(target_path).lstrip(os.sep) # Validate that the target exists in files collection. if target_path not in self.files: log.warning( "Documentation file '{}' contains a link to '{}' which is not found " "in the documentation files.".format(self.file.src_path, target_path)) return url target_file = self.files.get_file_from_path(target_path) path = target_file.url_relative_to(self.file) components = (scheme, netloc, path, params, query, fragment) return urlunparse(components)
def test_specialchars(self, monkeypatch, requires_collections, get_storage_args, get_item): if getattr(self, 'dav_server', '') == 'radicale': pytest.xfail('Radicale is fundamentally broken.') monkeypatch.setattr('vdirsyncer.utils.generate_href', lambda x: x) uid = u'test @ foo ät bar град сатану' collection = 'test @ foo ät bar' s = self.storage_class(**get_storage_args(collection=collection)) item = get_item(uid=uid) href, etag = s.upload(item) item2, etag2 = s.get(href) assert etag2 == etag assert_item_equals(item2, item) (href2, etag2), = s.list() assert etag2 == etag # https://github.com/owncloud/contacts/issues/581 assert href2.replace('%2B', '%20') == href item2, etag2 = s.get(href) assert etag2 == etag assert_item_equals(item2, item) assert collection in urlunquote(s.collection) if self.storage_class.storage_name.endswith('dav'): assert urlquote(uid, '/@:') in href
def test_specialchars(self, monkeypatch, requires_collections, get_storage_args, get_item): if getattr(self, 'dav_server', '') == 'radicale': pytest.skip('Radicale is fundamentally broken.') if getattr(self, 'dav_server', '') in ('icloud', 'fastmail'): pytest.skip('iCloud and FastMail reject this name.') monkeypatch.setattr('vdirsyncer.utils.generate_href', lambda x: x) uid = u'test @ foo ät bar град сатану' collection = 'test @ foo ät bar' s = self.storage_class(**get_storage_args(collection=collection)) item = get_item(uid=uid) href, etag = s.upload(item) item2, etag2 = s.get(href) if etag is not None: assert etag2 == etag assert_item_equals(item2, item) (_, etag3), = s.list() assert etag2 == etag3 # etesync uses UUIDs for collection names if self.storage_class.storage_name.startswith('etesync'): return assert collection in urlunquote(s.collection) if self.storage_class.storage_name.endswith('dav'): assert urlquote(uid, '/@:') in href
def url_unquote(self, quoted): """returns a unicode unquoted string decoding is based on `self.encoding` which is the encoding used in `url_quote` """ return urlunquote(quoted)
def get_url_straight_filename(url, strip=None, allowdir=False): """Get file/dir name of the last path component of the URL Parameters ---------- strip: list, optional If provided, listed names will not be considered and their parent directory will be selected allowdir: bool, optional If url points to a "directory" (ends with /), empty string would be returned unless allowdir is True, in which case the name of the directory would be returned """ path = urlunquote(urlsplit(url).path) path_parts = path.split('/') if allowdir: # strip empty ones while len(path_parts) > 1 and not path_parts[-1]: path_parts = path_parts[:-1] if strip: while path_parts and path_parts[-1] in strip: path_parts = path_parts[:-1] if path_parts: return path_parts[-1] else: return None
def test_specialchars(self, monkeypatch, requires_collections, get_storage_args, get_item): if getattr(self, "dav_server", "") == "radicale": pytest.skip("Radicale is fundamentally broken.") if getattr(self, "dav_server", "") in ("icloud", "fastmail"): pytest.skip("iCloud and FastMail reject this name.") monkeypatch.setattr("vdirsyncer.utils.generate_href", lambda x: x) uid = "test @ foo ät bar град сатану" collection = "test @ foo ät bar" s = self.storage_class(**get_storage_args(collection=collection)) item = get_item(uid=uid) href, etag = s.upload(item) item2, etag2 = s.get(href) if etag is not None: assert etag2 == etag assert_item_equals(item2, item) ((_, etag3), ) = s.list() assert etag2 == etag3 # etesync uses UUIDs for collection names if self.storage_class.storage_name.startswith("etesync"): return assert collection in urlunquote(s.collection) if self.storage_class.storage_name.endswith("dav"): assert urlquote(uid, "/@:") in href
def save_post(): """ Save the post """ data = request.get_json() path = data['path'] prefixes = current_app.config['WEB_EDITOR_PREFIXES'] if prefixes == []: raise Exception("Web editing is not configured") if prefixes is not None: if not any([path.startswith(prefix) for prefix in prefixes]): return json.dumps({ 'msg': ("Your post path must begin with one of {}").format(prefixes), 'success': False }) # TODO better handling of overwriting kp = None if path in current_repo: kp = current_repo.post(path) if g.user.username not in kp.headers[ 'authors'] and g.user.username not in current_repo.config.editors: return json.dumps({ 'msg': ("Post with path {} already exists and you are not an author!" "\nPlease try a different path").format(path), 'success': False }) # create the knowledge post kp = kp or KnowledgePost(path=path) headers = {} headers['created_at'] = datetime.strptime(data['created_at'], '%Y-%m-%d').date() headers['updated_at'] = datetime.strptime(data['updated_at'], '%Y-%m-%d').date() headers['title'] = data['title'] headers['path'] = data['path'] # TODO: thumbnail header not working currently, as feed image set with kp # method not based on header headers['thumbnail'] = data.get('feed_image', '') headers['authors'] = [auth.strip() for auth in data['author']] headers['tldr'] = data['tldr'] headers['tags'] = [tag.strip() for tag in data.get('tags', [])] if 'proxy' in data: headers['proxy'] = data['proxy'] kp.write(urlunquote(data['markdown']), headers=headers) # add to repo current_repo.add(kp, update=True, message=headers['title']) # THIS IS DANGEROUS update_index() return json.dumps({'path': path})
def all_objects(self, prefix=""): return [ urlunquote(e["Key"]) for p in self.s3.meta.client.get_paginator("list_objects_v2").paginate( Bucket=self.bucket, Prefix=prefix ) if "Contents" in p for e in p["Contents"] ]
def save_post(): """ Save the gitless post """ post_id = request.args.get('post_id', None) data = request.get_json() post = (db_session.query(Post) .filter(Post.id == post_id) .first()) new_post = False if not post: new_post = True path = "{}/{}.kp".format(data['project'], data['title'].encode('utf8').lower().replace(' ', '_')) if current_app.config.get('WEB_EDITOR_PREFIXES', None): # TODO: Include dropdown on webeditor to have user specify repo path = "{}/{}".format(current_app.config['WEB_EDITOR_PREFIXES'][0], path) post = (db_session.query(Post) .filter(Post.path == path) .first()) if post: error_msg = "Post with project {} and title {} already exists!".format(data['project'], data['title']) json_str = json.dumps({'msg': error_msg, 'success': False}) return json_str else: post = Post() post.path = path else: path = post.path # create the knowledge post kp = KnowledgePost(path=path) headers = {} headers['created_at'] = datetime.strptime(data['created_at'], '%Y-%m-%d') headers['updated_at'] = datetime.strptime(data['updated_at'], '%Y-%m-%d') headers['title'] = str(data['title']) headers['path'] = str(post.path) headers['project'] = str(data['project']) # TODO: thumbnail header not working currently, as feed image set with kp # method not based on header headers['thumbnail'] = str(data['feed_image']) headers['authors'] = [str(auth).strip() for auth in data['author']] headers['tldr'] = str(data['tldr']) headers['tags'] = [str(tag).strip() for tag in data['tags']] kp.write(urlunquote(str(data['markdown'])), headers=headers) # add to repo current_repo.add(kp, update=True) # THIS IS DANGEROUS db_session.commit() # add to index post.update_metadata_from_kp(kp) if new_post: db_session.add(post) db_session.commit() return json.dumps({'post_id': str(post.id)})
def post(self, path, data=None, json=None, headers=DEFAULT_HEADERS): """Handles POST request, returns Response object. This interface supports the minimal dialect for: - table creation - row insertion :param path: resource path :param data: buffer or file-like content value (not supported) :param json: in-memmory data object :param headers: request headers :return: response object """ logger.debug('path: %s' % path) logger.debug('json: %s' % str(json)) # handle table creation m = re.match(r'/schema/(?P<schema_name>[^/]+)/table', path) if m: try: schema_name = urlunquote(m.group('schema_name')) return SemiStructuredCatalog.Response( payload=self._create_table_on_disk(schema_name, json)) except Exception as e: return SemiStructuredCatalog.Response(error=e) # handle row insertion m = re.match( r'/entity/(?P<schema_name>[^/]+):(?P<table_name>[^/?]+)([?]defaults=(?P<defaults>.+))?', path) if m: try: schema_name = urlunquote(m.group('schema_name')) table_name = urlunquote(m.group('table_name')) return SemiStructuredCatalog.Response( payload=self._write_rows_to_file(schema_name, table_name, json)) except Exception as e: return SemiStructuredCatalog.Response(error=e) # all others, unhandled super(SemiStructuredCatalog, self).post(path, data=data, json=json, headers=headers)
def is_url_quoted(url): """Return whether URL looks being already quoted """ try: url_ = urlunquote(url) return url != url_ except: # problem with unquoting -- then it must be wasn't quoted (correctly) # MIH: ValueError? return False
def get_full_path(self, path_info): """ Get local filename path from path_info. """ path_info = utils.decode_path_info(path_info) path_info = posixpath.normpath(urlunquote(path_info)) path = os.path.normpath(self.root + path_info) if (self.default_extension and not os.path.exists(path) and os.path.splitext(path)[1] == '' and os.path.isfile(path + self.default_extension)): path += self.default_extension return path
def _cover_from_html(self, hcover): from calibre.ebooks import render_html_svg_workaround with TemporaryDirectory('_html_cover') as tdir: writer = OEBWriter() writer(self.oeb, tdir) path = os.path.join(tdir, urlunquote(hcover.href)) data = render_html_svg_workaround(path, self.logger) if not data: data = '' id, href = self.oeb.manifest.generate('cover', 'cover.jpg') item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data) return item
def parse_params( instr: str, add_note: AddNoteMethodType, nostar: Union[List[str], bool] = None, delim: str = ";", ) -> Dict[str, str]: """ Parse parameters into a dictionary. """ param_dict = {} # type: Dict[str, str] for param in split_string(instr, rfc7231.parameter, r"\s*%s\s*" % delim): try: key, val = param.split("=", 1) except ValueError: param_dict[param.lower()] = None continue k_norm = key.lower() # TODO: warn on upper-case in param? if k_norm in param_dict: add_note(PARAM_REPEATS, param=k_norm) if val[0] == val[-1] == "'": add_note( PARAM_SINGLE_QUOTED, param=k_norm, param_val=val, param_val_unquoted=val[1:-1], ) if key[-1] == "*": if nostar is True or (nostar and k_norm[:-1] in nostar): # type: ignore add_note(PARAM_STAR_BAD, param=k_norm[:-1]) else: if val[0] == '"' and val[-1] == '"': add_note(PARAM_STAR_QUOTED, param=k_norm) val = val[1:-1] try: enc, lang, esc_v = val.split("'", 3) except ValueError: add_note(PARAM_STAR_ERROR, param=k_norm) continue enc = enc.lower() lang = lang.lower() if enc == "": add_note(PARAM_STAR_NOCHARSET, param=k_norm) continue elif enc not in ["utf-8"]: add_note(PARAM_STAR_CHARSET, param=k_norm, enc=enc) continue # TODO: catch unquoting errors, range of chars, charset unq_v = urlunquote(esc_v) param_dict[k_norm] = unq_v else: param_dict[k_norm] = unquote_string(val) return param_dict
def read_manifest(self): if '/manifest' not in self.entries: raise LitError('Lit file does not have a valid manifest') raw = self.get_file('/manifest') self.manifest = {} self.paths = {self.opf_path: None} while raw: slen, raw = ord(raw[0]), raw[1:] if slen == 0: break root, raw = raw[:slen].decode('utf8'), raw[slen:] if not raw: raise LitError('Truncated manifest') for state in ['spine', 'not spine', 'css', 'images']: num_files, raw = int32(raw), raw[4:] if num_files == 0: continue for i in range(num_files): if len(raw) < 5: raise LitError('Truncated manifest') offset, raw = u32(raw), raw[4:] internal, raw = consume_sized_utf8_string(raw) original, raw = consume_sized_utf8_string(raw) # The path should be stored unquoted, but not always original = urlunquote(original) # Is this last one UTF-8 or ASCIIZ? mime_type, raw = consume_sized_utf8_string(raw, zpad=True) self.manifest[internal] = ManifestItem( original, internal, mime_type, offset, root, state) mlist = list(self.manifest.values()) # Remove any common path elements if len(mlist) > 1: shared = mlist[0].path for item in mlist[1:]: path = item.path while shared and not path.startswith(shared): try: shared = shared[:shared.rindex("/", 0, -2) + 1] except ValueError: shared = None if not shared: break if shared: slen = len(shared) for item in mlist: item.path = item.path[slen:] # Fix any straggling absolute paths for item in mlist: if item.path[0] == '/': item.path = os.path.basename(item.path) self.paths[item.path] = item
def parse_custom_verification_url(url, verification_field_names): parsed_url = urlparse(url) num_of_fields = len(verification_field_names) url_path = parsed_url.path.rstrip('/') url_segments = url_path.rsplit('/', num_of_fields) if len(url_segments) != num_of_fields + 1: raise ValueError("Could not parse {url}".format(url=url)) data_segments = url_segments[1:] url_path = url_segments[0] + '/' verification_data = { name: urlunquote(value) for name, value in zip(verification_field_names, data_segments)} return url_path, verification_data
def updateTree(self, path="/"): """ Updates the Local dictionary of directories and files """ self.log.debug("updating Local DataTrees %s" % path) DATA = "<?xml version='1.0' encoding='UTF-8' ?><D:propfind xmlns:D='DAV:'><D:prop><D:allprop/></D:prop></D:propfind>" resp = self.http(self.url + "/" + path, 'PROPFIND') if resp.status_code != 207: self.good = False return self.good = True obj = ET.XML(resp.text) if obj.tag != "{DAV:}multistatus": return for i in obj.getchildren(): if i.tag == "{DAV:}response": newEntry = dict() for d in i.getchildren(): if d.tag == "{DAV:}href": name = urlunquote(d.text[len(self.base) + 1:]) newEntry['name'] = name elif d.tag == "{DAV:}propstat": X = d.find("{DAV:}prop") if X is not None: ID = X.find("{http://owncloud.org/ns}id") ETAG = X.find("{DAV:}etag") lastMod = X.find("{DAV:}getlastmodified") length = X.find("{DAV:}getcontentlength") if lastMod is not None: try: fmt = "%a, %d %b %Y %H:%M:%S GMT" T = time.strptime(lastMod.text, fmt) newEntry['lastMod'] = int((time.mktime(T) - time.altzone) * 1000) except Exception as e: self.log.error("Problem converting time stamp: %s, %s" % (newEntry['name'], lastMod.text)) newEntry['lastMod'] = 0 if length is not None: newEntry['size'] = length.text newEntry['type'] = "FILE" self.FILES[newEntry['name']] = newEntry else: newEntry['type'] = "DIR" self.DIRS[newEntry['name']] = newEntry if newEntry['type'] == "DIR" and newEntry['name'] != path: self.updateTree(newEntry['name']) if "/" in self.FILES: del(self.FILES["/"]) if "/" in self.DIRS: del(self.DIRS["/"])
def escape(self, strict=False): '''Make sure that the path is correctly escaped''' if strict: self._path = self.percent_encode(self._path, URL.PATH) self._query = self.percent_encode(self._query, URL.QUERY) self._params = self.percent_encode(self._params, URL.QUERY) if self._userinfo: self._userinfo = self.percent_encode(self._userinfo, URL.USERINFO) return self else: self._path = urlquote(urlunquote(self._path), safe=URL.PATH) # Safe characters taken from: # http://tools.ietf.org/html/rfc3986#page-50 self._query = urlquote(urlunquote(self._query), safe=URL.QUERY) # The safe characters for URL parameters seemed a little more vague. # They are interpreted here as *pchar despite this page, since the # updated RFC seems to offer no replacement # http://tools.ietf.org/html/rfc3986#page-54 self._params = urlquote(urlunquote(self._params), safe=URL.QUERY) if self._userinfo: self._userinfo = urlquote(urlunquote(self._userinfo), safe=URL.USERINFO) return self
def get_toc(self): self.stream.seek(24) toc_offset = self.read_i32() self.stream.seek(toc_offset) pages = self.read_i32() toc = RBToc() for i in range(pages): name = urlunquote(self.stream.read(32).strip('\x00')) size, offset, flags = self.read_i32(), self.read_i32( ), self.read_i32() toc.append( RBToc.Item(name=name, size=size, offset=offset, flags=flags)) return toc
def pathList(self, unquote=False, copy=True): """ Split this URL's path into its components. @param unquote: whether to remove %-encoding from the returned strings. @param copy: (ignored, do not use) @return: The components of C{self.path} @rtype: L{list} of L{bytes} """ segments = self._url.path mapper = lambda x: x.encode("ascii") if unquote: mapper = lambda x, m=mapper: m(urlunquote(x)) return [b""] + [mapper(segment) for segment in segments]
def do_login(pid): pid = int(pid) cfg = autonom.get_provider(pid) username = request.forms.get('username') password = request.forms.get('password') url = urlunquote(request.forms.get('url')) user,grps = check_login(username, password, cfg) if user: # succesful... return autonom.new_session(user,grps,url) templ = get_templ(pid) return template(templ,url=url,msg="Login Failed")
def save_post(): """ Save the post """ data = request.get_json() path = data['path'] prefixes = current_app.config['WEB_EDITOR_PREFIXES'] if prefixes == []: raise Exception("Web editing is not configured") if prefixes is not None: if not any([path.startswith(prefix) for prefix in prefixes]): return json.dumps({'msg': (u"Your post path must begin with one of {}").format(prefixes), 'success': False}) # TODO better handling of overwriting kp = None if path in current_repo: kp = current_repo.post(path) if current_user.identifier not in kp.headers['authors'] and current_user.identifier not in current_repo.config.editors: return json.dumps({'msg': (u"Post with path {} already exists and you are not an author!" "\nPlease try a different path").format(path), 'success': False}) # create the knowledge post kp = kp or KnowledgePost(path=path) headers = {} headers['created_at'] = datetime.strptime(data['created_at'], '%Y-%m-%d').date() headers['updated_at'] = datetime.strptime(data['updated_at'], '%Y-%m-%d').date() headers['title'] = data['title'] headers['path'] = data['path'] # TODO: thumbnail header not working currently, as feed image set with kp # method not based on header headers['thumbnail'] = data.get('feed_image', '') headers['authors'] = [auth.strip() for auth in data['author']] headers['tldr'] = data['tldr'] headers['tags'] = [tag.strip() for tag in data.get('tags', [])] if 'proxy' in data: headers['proxy'] = data['proxy'] kp.write(urlunquote(data['markdown']), headers=headers) # add to repo current_repo.add(kp, update=True, message=headers['title']) # THIS IS DANGEROUS update_index() return json.dumps({'path': path})
def svnUriCanonicalize(uri): collapse = re.compile(r'([^/]+/\.\./?|/\./|//|/\.$|/\.\.$|^/\.\.)') server_authority = re.compile(r'^(?:([^@]+)@)?([^:]+)(?::(.+))?$') default_port = {'http': '80', 'https': '443', 'svn': '3690'} relative_schemes = ['http', 'https', 'svn'] def quote(uri): return urlquote(uri, "!$&'()*+,-./:=@_~", encoding="latin-1") if not uri or uri == '/': return uri (scheme, authority, path, parameters, query, fragment) = urlparse(uri) scheme = scheme.lower() if authority: mo = server_authority.match(authority) if not mo: return uri # give up userinfo, host, port = mo.groups() if host[-1] == '.': host = host[:-1] authority = host.lower() if userinfo: authority = "{}@{}".format(userinfo, authority) if port and port != default_port.get(scheme, None): authority = "{}:{}".format(authority, port) if scheme in relative_schemes: last_path = path while True: path = collapse.sub('/', path, 1) if last_path == path: break last_path = path path = quote(urlunquote(path)) canonical_uri = urlunparse( (scheme, authority, path, parameters, query, fragment)) if canonical_uri == '/': return canonical_uri elif canonical_uri[-1] == '/' and canonical_uri[-2] != '/': return canonical_uri[:-1] return canonical_uri
def svnUriCanonicalize(uri): collapse = re.compile(r'([^/]+/\.\./?|/\./|//|/\.$|/\.\.$|^/\.\.)') server_authority = re.compile(r'^(?:([^@]+)@)?([^:]+)(?::(.+))?$') default_port = {'http': '80', 'https': '443', 'svn': '3690'} relative_schemes = ['http', 'https', 'svn'] def quote(uri): return urlquote(uri, "!$&'()*+,-./:=@_~", encoding="latin-1") if not uri or uri == '/': return uri (scheme, authority, path, parameters, query, fragment) = urlparse(uri) scheme = scheme.lower() if authority: mo = server_authority.match(authority) if not mo: return uri # give up userinfo, host, port = mo.groups() if host[-1] == '.': host = host[:-1] authority = host.lower() if userinfo: authority = "%s@%s" % (userinfo, authority) if port and port != default_port.get(scheme, None): authority = "%s:%s" % (authority, port) if scheme in relative_schemes: last_path = path while True: path = collapse.sub('/', path, 1) if last_path == path: break last_path = path path = quote(urlunquote(path)) canonical_uri = urlunparse( (scheme, authority, path, parameters, query, fragment)) if canonical_uri == '/': return canonical_uri elif canonical_uri[-1] == '/' and canonical_uri[-2] != '/': return canonical_uri[:-1] return canonical_uri
def read(self, name): entry = self._litfile.paths[urlunquote(name)] if name else None if entry is None: content = OPF_DECL + self._read_meta() elif 'spine' in entry.state: internal = '/'.join(('/data', entry.internal, 'content')) raw = self._litfile.get_file(internal) manifest = self._litfile.manifest atoms = self._litfile.get_atoms(entry) unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms) content = HTML_DECL + str(unbin) tags = ('personname', 'place', 'city', 'country-region') pat = r'(?i)</{0,1}st1:(%s)>'%('|'.join(tags)) content = re.sub(pat, '', content) content = re.sub(r'<(/{0,1})form>', r'<\1div>', content) else: internal = '/'.join(('/data', entry.internal)) content = self._litfile.get_file(internal) return content
def parse_content_disposition(value): m = re.match("^filename[*]=UTF-8''(?P<name>[-_.~A-Za-z0-9%]+)$", value) if not m: raise ValueError('Cannot parse content-disposition "%s".' % value) n = m.groupdict()['name'] try: n = urlunquote(str(n)) except Exception as e: raise ValueError('Invalid URL encoding of content-disposition filename component. %s.' % e) try: if sys.version_info < (3,): n = n.decode('utf8') except Exception as e: raise ValueError('Invalid UTF-8 encoding of content-disposition filename component. %s.' % e) return n
def __init__(self, path=None): self.headers = {} self.input_headers = {} self.prepath = [] x = path.split(b'?', 1) if len(x) == 1: self.path = path self.args = {} else: path, argstring = x self.path = path self.args = parse_qs(argstring, 1) self.uri = self.path self.postpath = [] for p in path[1:].split(b'/'): path = urlunquote(bytes2unicode(p)) self.postpath.append(unicode2bytes(path)) self.deferred = defer.Deferred()
def parse_params(instr: str, add_note: AddNoteMethodType, nostar: Union[List[str], bool]=None, delim: str=";") -> Dict[str, str]: """ Parse parameters into a dictionary. """ param_dict = {} # type: Dict[str, str] for param in split_string(instr, rfc7231.parameter, r"\s*%s\s*" % delim): try: key, val = param.split("=", 1) except ValueError: param_dict[param.lower()] = None continue k_norm = key.lower() # TODO: warn on upper-case in param? if k_norm in param_dict: add_note(PARAM_REPEATS, param=k_norm) if val[0] == val[-1] == "'": add_note(PARAM_SINGLE_QUOTED, param=k_norm, param_val=val, param_val_unquoted=val[1:-1]) if key[-1] == '*': if nostar is True or (nostar and k_norm[:-1] in nostar): # type: ignore add_note(PARAM_STAR_BAD, param=k_norm[:-1]) else: if val[0] == '"' and val[-1] == '"': add_note(PARAM_STAR_QUOTED, param=k_norm) val = val[1:-1] try: enc, lang, esc_v = val.split("'", 3) except ValueError: add_note(PARAM_STAR_ERROR, param=k_norm) continue enc = enc.lower() lang = lang.lower() if enc == '': add_note(PARAM_STAR_NOCHARSET, param=k_norm) continue elif enc not in ['utf-8']: add_note(PARAM_STAR_CHARSET, param=k_norm, enc=enc) continue # TODO: catch unquoting errors, range of chars, charset unq_v = urlunquote(esc_v) param_dict[k_norm] = unq_v else: param_dict[k_norm] = unquote_string(val) return param_dict
def filename_from_url(url, ext): """Extract a valid filename from a URL :Args: - url (str): URL to extract the filename from. - ext (str): An additional file extension if necessary. May be ``None``. :Returns: - A valid filename. """ alt = urlsplit(url) url_file = alt.path.rpartition('/')[2] filename = urlunquote(url_file) LOG.debug("Filename {fn} with extension {ex} from URL " "{ur}".format(fn=filename, ex=ext, ur=url)) return (filename + ext) if ext else filename
def parse_as_folder_reference(dep): """ See if a dependency reference refers to a folder path. If it does, return the folder path (which parses and resolves file:// urls in the process). If it doesn't, return None. """ # Special case: pep508 urls if dep.find("@") > 0 and ( (dep.find("@") < dep.find("/") or "/" not in dep) and (dep.find("@") < dep.find(":") or ":" not in dep) ): # This should be a 'pkgname @ https://...' style path, or # 'pkname @ /local/file/path'. return parse_as_folder_reference(dep.partition("@")[2].lstrip()) # Check if this is either not an url, or a file URL: if dep.startswith(("/", "file://")) or ( dep.find("/") > 0 and dep.find("://") < 0): if dep.startswith("file://"): dep = urlunquote(urlparse(dep).path) return dep return None
def _path_encode(x): return wsgi_encoding_dance( urlunquote(x, self.charset), self.charset )
def go_split(string): return [urlunquote(part) for part in string.split(':')]
def process_ck_web_request(i): """ Input: { http - Python http object } Output: { None } """ # http object http=i['http'] # Parse GET variables and path xget={} xpath={'first':'', 'rest':'', 'query':''} # May be used in the future xt='json' # Check GET variables if http.path!='': http.send_response(200) a=urlparse.urlparse(http.path) xp=a.path xr='' if xp.startswith('/'): xp=xp[1:] u=xp.find('/') if u>=0: xr=xp[u+1:] xp=xp[:u] xt=xp xpath['first']=xp xpath['rest']=xr xpath['query']=a.query b=urlparse.parse_qs(a.query, keep_blank_values=True, ) xget={} for k in b: # xget[k]=b[k][0] xget[k]=urlunquote(b[k][0]) if sys.version_info[0]<3: xget[k]=xget[k].decode('utf8') # Check POST xpost={} xpost1={} try: headers = http.headers content_type = headers.get('content-type') ctype='' if content_type != None: ctype, pdict = cgi.parse_header(content_type) # Python3 cgi.parse_multipart expects boundary to be bytes, not str. if sys.version_info[0]<3 and 'boundary' in pdict: pdict['boundary'] = pdict['boundary'].encode() if ctype == 'multipart/form-data': if sys.version_info[0]<3: xpost1 = cgi.parse_multipart(http.rfile, pdict) else: xxpost1 = cgi.FieldStorage(fp=http.rfile, headers=headers, environ={'REQUEST_METHOD':'POST'}) for k in xxpost1.keys(): xpost1[k]=[xxpost1[k].value] elif ctype == 'application/x-www-form-urlencoded': length = int(http.headers.get('content-length')) s=http.rfile.read(length) if sys.version_info[0]>2: s=s.decode('utf8') xpost1 = cgi.parse_qs(s, keep_blank_values=1) except Exception as e: bin=b'internal CK web service error [7101] ('+format(e).encode('utf8')+')' web_err({'http':http, 'type':xt, 'bin':bin}) ck.out(ck.cfg['error']+bin.decode('utf8')) return # Post processing for k in xpost1: v=xpost1[k] if k.endswith('[]'): k1=k[:-2] xpost[k1]=[] for l in v: xpost[k1].append(urlunquote(l)) else: if k!='file_content': xpost[k]=urlunquote(v[0]) else: xpost[k]=v[0] if k=='file_content': fcrt=xpost1.get('file_content_record_to_tmp','') if (type(fcrt)==list and len(fcrt)>0 and fcrt[0]=='yes') or fcrt=='yes': fd, fn=tempfile.mkstemp(suffix='.tmp', prefix='ck-') # suffix is important - CK will delete such file! os.close(fd) f=open(fn,'wb') f.write(xpost[k]) f.close() xpost[k+'_uploaded']=fn del(xpost[k]) k+='_uploaded' else: import base64 xpost[k+'_base64']=base64.urlsafe_b64encode(xpost[k]).decode('utf8') del(xpost[k]) k+='_base64' if sys.version_info[0]<3: xpost[k]=xpost[k].decode('utf8') # Prepare input and check if CK json present ii=xget ii.update(xpost) cj=ii.get('ck_json','').strip() if cj!='': r=ck.convert_json_str_to_dict({'str':cj, 'skip_quote_replacement':'yes'}) if r['return']>0: bin=b'internal CK web service error [7102] ('+r['error'].encode('utf8')+b')' web_err({'http':http, 'type':xt, 'bin':bin}) ck.out(ck.cfg['error']+bin.decode('utf8')) return del(ii['ck_json']) ii.update(r['dict']) # Misc parameters dc=ii.get('detach_console','') act=ii.get('action','') # Check output type if ii.get('out','')!='': xt=ii['out'] if xt=='': xt='web' if xt!='json' and xt!='con' and xt!='web': web_out({'http':http, 'type':'web', 'bin':b'Unknown CK request ('+xt.encode('utf8')+b')!'}) return # Prepare temporary output file fd, fn=tempfile.mkstemp(prefix='ck-') os.close(fd) os.remove(fn) # Check output if dc=='yes': if ck.cfg.get('forbid_detached_console','')=='yes': web_out({'http':http, 'type':'web', 'bin':b'Detached console is forbidden!'}) return else: ii['out_file']=fn ii['web']='yes' if xt=='json' or xt=='web': ii['out']='json_file' # else output to console (for remote access for example) ii['con_encoding']='utf8' # Execute command ********************************************************* if act=='': if cfg.get('if_web_action_not_defined','')!='' and cfg.get('if_web_module_not_defined','')!='': ii['module_uoa']=cfg['if_web_module_not_defined'] ii['action']=cfg['if_web_action_not_defined'] r=call_ck(ii) # Process output if r['return']>0: if os.path.isfile(fn): os.remove(fn) bout=r['error'] try: bout=bout.encode('utf-8') except Exception as e: pass web_err({'http':http, 'type':xt, 'bin':bout}) return # If output to console or detached console if xt=='con' or dc=='yes': if os.path.isfile(fn): os.remove(fn) bout=r.get('std','').encode('utf8') web_out({'http':http, 'type':xt, 'bin':bout}) return # If json or web # Try to load output file if not os.path.isfile(fn): web_err({'http':http, 'type':xt, 'bin':b'Output json file was not created, see output ('+r['std'].encode('utf8')+b')!'}) return r=ck.load_text_file({'text_file':fn, 'keep_as_bin':'yes'}) if r['return']>0: bout=r['error'] try: bout=bout.encode('utf-8') except Exception as e: pass web_err({'http':http, 'type':xt, 'bin':bout}) return bin=r['bin'] if os.path.isfile(fn): os.remove(fn) # Process JSON output from file fx='' if sys.version_info[0]>2: bin=bin.decode('utf-8') ru=ck.convert_json_str_to_dict({'str':bin, 'skip_quote_replacement':'yes'}) if ru['return']>0: bout=ru['error'] try: bout=bout.encode('utf-8') except Exception as e: pass web_err({'http':http, 'type':xt, 'bin':bout}) return rr=ru['dict'] if rr['return']>0: bout=rr['error'] try: bout=bout.encode('utf-8') except Exception as e: pass web_err({'http':http, 'type':xt, 'bin':bout}) return # Check if file was returned fr=False if 'file_content_base64' in rr and rr.get('filename','')!='': fr=True # Check if download if (xt=='web' and fr) or (act=='pull' and xt!='json'): import base64 x=rr.get('file_content_base64','') fx=rr.get('filename','') if fx=='': fx=ck.cfg['default_archive_name'] # Fixing Python bug if sys.version_info[0]==3 and sys.version_info[1]<3: x=x.encode('utf-8') else: x=str(x) bin=base64.urlsafe_b64decode(x) # convert from unicode to str since base64 works on strings # should be safe in Python 2.x and 3.x # Process extension fn1, fne = os.path.splitext(fx) if fne.startswith('.'): fne=fne[1:] if fne!='': xt=fne else: xt='unknown' else: # Check and output html if rr.get('html','')!='': bin=rr['html'].encode('utf-8') else: if sys.version_info[0]>2: # Unknown output bin=bin.encode('utf-8') web_out({'http':http, 'type':xt, 'bin':bin, 'filename':fx}) return {'return':0}
def loads(value): return urlunquote(value.decode('ascii'))