def frankiz_check(request): import logging logger = logging.getLogger(__name__) if not "timestamp" in request.query_params.keys( ) or not "response" in request.query_params.keys( ) or not "hash" in request.query_params.keys(): logger.error('KEYS') raise NotAuthenticated() response = parse.unquote_to_bytes(request.query_params.get("response")) ts = parse.unquote_to_bytes(request.query_params.get("timestamp")) h = parse.unquote_to_bytes(request.query_params.get("hash")) if abs(int(time.time()) - int(ts)) > 3600 * 3: logger.error('TS') raise NotAuthenticated() if hashlib.md5(ts + FKZ_KEY + response).hexdigest() != h.decode(): logger.error('HASH') raise NotAuthenticated() data = json.loads(response.decode()) return request
def frankiz_auth_check(self, request): import logging logger = logging.getLogger(__name__) student = None response_data = {'valid': True, 'student': student} RETURN_PAGE = ('http://' + request.get_host() + '/vaneau/').encode() logger.error(RETURN_PAGE) if not "timestamp" in request.query_params.keys() or not "response" in request.query_params.keys() or not "hash" in request.query_params.keys(): logger.error('KEYS') response_data["valid"] = False response = parse.unquote_to_bytes(request.query_params.get("response")) ts = parse.unquote_to_bytes(request.query_params.get("timestamp")) h = parse.unquote_to_bytes(request.query_params.get("hash")) if abs(int(time.time()) - int(ts)) > 3600*3 or abs(int(ts) + 3*3600 - int(time.time())) < 30*60: logger.error('TS') response_data["valid"] = False if hashlib.md5(ts + FKZ_KEY + response).hexdigest() != h.decode(): logger.error('HASH') response_data["valid"] = False if response_data["valid"]: data = json.loads(response.decode()) try: student = Student.objects.get(hruid=data["hruid"]) except Student.DoesNotExist: student = Student.objects.create(hruid=data["hruid"], lastname=data["lastname"], firstname=data["firstname"], promo=data["promo"]) finally: response_data["student"] = StudentSerializer(student).data return Response(response_data, 200)
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False): """Modify `urllib.parse.parse_qsl` to handle percent-encoded characters properly. `parse_qsl` replaces percent-encoded characters with replacement character (U+FFFD) (if errors = "replace") or drops them (if errors = "ignore") (See https://docs.python.org/3/howto/unicode.html#the-string-type). Instead we want to keep the raw bytes. And later we can percent-encode them directly when we need to. Code from https://github.com/python/cpython/blob/73c4708630f99b94c35476529748629fff1fc63e/Lib/urllib/parse.py#L658 with `unquote` replaced with `unquote_to_bytes` """ qs, _coerce_result = _coerce_args(qs) pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value, )) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote_to_bytes(name) name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote_to_bytes(value) value = _coerce_result(value) r.append((name, value)) return r
def get_clonebundle_url(repo): bundles = repo._call(b'clonebundles') supported_bundles = (b'v1', b'v2') supported_compressions = tuple( k for k, v in ( (b'none', b'UN'), (b'gzip', b'GZ'), (b'bzip2', b'BZ'), (b'zstd', b'ZS'), ) if HgRepoHelper.supports((b'compression', v)) ) has_sni = getattr(ssl, 'HAS_SNI', False) logger = logging.getLogger('clonebundle') for line in bundles.splitlines(): attrs = line.split() if not attrs: continue url = attrs.pop(0) logger.debug(url) attrs = { unquote_to_bytes(k): unquote_to_bytes(v) for k, _, v in (a.partition(b'=') for a in attrs) } logger.debug(attrs) if b'REQUIRESNI' in attrs and not has_sni: logger.debug('Skip because of REQUIRESNI, but SNI unsupported') continue spec = attrs.get(b'BUNDLESPEC') if not spec: logger.debug('Skip because missing BUNDLESPEC') continue typ, _, params = spec.partition(b';') compression, _, version = typ.partition(b'-') if compression not in supported_compressions: logger.debug('Skip because unsupported compression (%s)', compression) continue if version not in supported_bundles: logger.debug('Skip because unsupported bundle type (%s)', version) continue params_dict = {} for p in params.split(b':'): k, _, v = p.partition(b'=') params_dict[k] = v if 'stream' in params_dict: logger.debug('Skip because stream bundles are not supported') continue return url
def user_page(): auth_cookie = unquote_to_bytes(request.cookies.get("auth")) sig_cookie = unquote_to_bytes(request.cookies.get("sig")) if auth_cookie is None or sig_cookie is None: return redirect(url_for("hello")) if sig_cookie != make_signature(auth_cookie): resp = make_response(redirect(url_for("hello"))) resp.delete_cookie("auth") resp.delete_cookie("sig") return resp return "<h1>Hola guset, only admin can reach secret page</h1><br><!--maybe source page helps you-->"
def parse_qsl_to_bytes(qs, keep_blank_values=False, strict_parsing=False): """Parse a query given as a string argument. Data are returned as a list of name, value pairs as bytes. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. """ # This code is the same as Python3's parse_qsl() # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a) # except for the unquote(s, encoding, errors) calls replaced # with unquote_to_bytes(s) qs, _coerce_result = _coerce_args(qs) pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value, )) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote_to_bytes(name) name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote_to_bytes(value) value = _coerce_result(value) r.append((name, value)) return r
def parse_qsl_to_bytes(qs, keep_blank_values=False, strict_parsing=False): """Parse a query given as a string argument. Data are returned as a list of name, value pairs as bytes. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. """ # This code is the same as Python3's parse_qsl() # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a) # except for the unquote(s, encoding, errors) calls replaced # with unquote_to_bytes(s) qs, _coerce_result = _coerce_args(qs) pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value,)) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote_to_bytes(name) name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote_to_bytes(value) value = _coerce_result(value) r.append((name, value)) return r
def unravel_message(url, cache): """Follows the riddle until the end — as in mission 04 — but this time looking at the first cookie. Then returns the message decompressed.""" curr = 12345 cookie_jar = CookieJar() url_opener = build_opener(HTTPCookieProcessor(cookie_jar)) msg = cache.get("msg", "") if not msg: while True: try: resp = url_opener.open(url.format(curr), timeout=3) riddle = resp.read().decode() cookie = list(cookie_jar)[0] msg += cookie.value except KeyboardInterrupt: sys.exit(0) except Exception as e: print(f"\nBang! {e} ({curr})") sys.exit(1) try: next_ = int(riddle.split(" ")[-1]) except ValueError: break curr = next_ cache["msg"] = msg return bz2.decompress(unquote_to_bytes(msg.replace("+", " "))).decode()
def parse_querystring(query_string): """ Parse a raw query string. Args: query_string: raw query string Return a list of 2-tuples (key=value). Examples: >>> parsed = QueryString.parse_querystring('field1=foo&field2=bar') >>> expected = [('field1', 'foo'), ('field2', 'bar')] >>> parsed == expected True """ result = [] # make sure the string neither begins nor ends with a & # the same rule applies to query parameters split by a = # ie filter out &field&, =field, field=, =field=value, etc for param in query_string.strip('&').split('&'): param_split = param.strip('=').split('=', 1) # max_splits=1 result.append(tuple([ unquote_plus(unquote_to_bytes(x.encode('utf-8')).decode('utf-8')) # 2/3 hack for x in (param_split + [''])[:2] # make sure the param value is present ])) return result
def display_url(url): """Display a URL like the browser URL bar would. Note: returns a Unicode object, not a valid URL. """ url = force_bytes(url, errors='replace') return unquote_to_bytes(url).decode('utf-8', errors='replace')
def open_data_url(url): """Decode URLs with the 'data' scheme. urllib can handle them in Python 2, but that is broken in Python 3. Inspired from Python 2.7.2’s urllib.py. """ # syntax of data URLs: # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data # mediatype := [ type "/" subtype ] *( ";" parameter ) # data := *urlchar # parameter := attribute "=" value try: header, data = url.split(",", 1) except ValueError: raise IOError("bad data URL") header = header[5:] # len("data:") == 5 if header: semi = header.rfind(";") if semi >= 0 and "=" not in header[semi:]: encoding = header[semi + 1:] else: encoding = "" else: encoding = "" data = unquote_to_bytes(data) if encoding == "base64": missing_padding = 4 - len(data) % 4 if missing_padding: data += b"=" * missing_padding return base64.decodestring(data) return data
def get_message(): # Start with 12345, like in problem 4 busynothing = "12345" cookie_msg = "" for _ in range(400): url = f"http://www.pythonchallenge.com/pc/def/linkedlist.php?busynothing={busynothing}" request = urlopen(url) # Read the next byte in the cookie headers = request.getheaders() cookie = get_header_cookie(headers) cookie_msg += search("info=([^;]+);", cookie).group(1) # The next busynothing is in the response data = request.read().decode("utf-8") re = search("the next busynothing is (\d+)", data) if re is None: break busynothing = re.group(1) # Convert to bytes and decompress. The spaces were URL encoded though res = unquote_to_bytes(cookie_msg.replace("+", " ")) decoded = bz2.decompress(res).decode() print(f"Decoded from cookies: {decoded}") return search('"(.+)"', decoded).group(1)
def verify_handshake(token, signature, action): """ Decodes a handshake token (X-Siphon-Handshake-Token header) and verifies it against the given signature (X-Siphon-Handshake-Signature header). If it passes verification it decodes the payload's JSON. If this is a development handshake (i.e. tying a username to an app ID) it returns a tuple containing two strings: (<username>, <app-id>) The given app ID is guaranteed to be owned by the user. Otherwise, if this is a production handshake it returns: (<submission-id>, <app-id>) Note that production handshake's are not tied to a user. """ # Note that rsa.verify() detects the SHA-256 hashing method for us payload = base64.b64decode(unquote(token)) signature_bytes = unquote_to_bytes(signature) ok = rsa.verify(payload, signature_bytes, get_public_key()) if not ok: raise HandshakeError() try: obj = json.loads(payload.decode('utf8')) if 'action' in obj and obj['action'] != action: raise HandshakeError() if 'user_id' in obj and 'app_id' in obj: return (obj['user_id'], obj['app_id']) else: return (obj['submission_id'], obj['app_id']) except (ValueError, KeyError): raise HandshakeError()
def url2string( title: str, encodings: Union[str, List[str], Tuple[str, ...]] = 'utf-8') -> str: """Convert URL-encoded text to unicode using several encoding. Uses the first encoding that doesn't cause an error. :param title: URL-encoded character data to convert :param encodings: Encodings to attempt to use during conversion. :raise UnicodeError: Could not convert using any encoding. """ if isinstance(encodings, str): encodings = [encodings] first_exception = None for enc in encodings: try: t = title.encode(enc) t = unquote_to_bytes(t) except UnicodeError as e: if not first_exception: first_exception = e else: return t.decode(enc) # Couldn't convert, raise the first exception raise first_exception
def _dummy_request(self, **kwargs): self.assertIn('body', kwargs) self.assertIn('uri', kwargs) self.assertIn('site', kwargs) if kwargs['body'] is None: # use uri and remove script path parameters = kwargs['uri'] prefix = kwargs['site'].scriptpath() + '/api.php?' self.assertEqual(prefix, parameters[:len(prefix)]) parameters = parameters[len(prefix):] else: parameters = kwargs['body'] parameters = parameters.encode('ascii') # it should be bytes anyway # Extract parameter data from the body, it's ugly but allows us # to verify that we actually test the right request parameters = [p.split(b'=', 1) for p in parameters.split(b'&')] keys = [p[0].decode('ascii') for p in parameters] values = [unquote_to_bytes(p[1]) for p in parameters] values = [v.decode(kwargs['site'].encoding()) for v in values] values = [v.replace('+', ' ') for v in values] values = [set(v.split('|')) for v in values] parameters = dict(zip(keys, values)) if 'fake' not in parameters: return False # do an actual request if self.assert_parameters: for param, value in self.assert_parameters.items(): self.assertIn(param, parameters) if value is not None: if isinstance(value, UnicodeType): value = value.split('|') self.assertLessEqual(set(value), parameters[param]) return self.data
def url_unescape(value, encoding='utf-8', plus=True): """Decodes the given value from a URL. The argument may be either a byte or unicode string. If encoding is None, the result will be a byte string. Otherwise, the result is a unicode string in the specified encoding. If ``plus`` is true (the default), plus signs will be interpreted as spaces (literal plus signs must be represented as "%2B"). This is appropriate for query strings and form-encoded values but not for the path component of a URL. Note that this default is the reverse of Python's urllib module. .. versionadded:: 3.1 The ``plus`` argument """ if encoding is None: if plus: # unquote_to_bytes doesn't have a _plus variant value = to_basestring(value).replace('+', ' ') return urllib_parse.unquote_to_bytes(value) else: unquote = (urllib_parse.unquote_plus if plus else urllib_parse.unquote) return unquote(to_basestring(value), encoding=encoding)
def filename(self): if self._filename is None: if self._content_disposition is None: value = self._headers.get(b'content-disposition', b'') self._content_disposition = cgi.parse_header(value.decode()) _, params = self._content_disposition # NOTE(vytas): Supporting filename* as per RFC 5987, as that has # been spotted in the wild, even though RFC 7578 forbids it. match = _FILENAME_STAR_RFC5987.match(params.get('filename*', '')) if match: charset, value = match.groups() try: self._filename = unquote_to_bytes(value).decode(charset) except (ValueError, LookupError) as err: raise MultipartParseError( description='invalid text or charset: {}'.format(charset) ) from err else: value = params.get('filename') if value is None: return None self._filename = value return self._filename
def ismatch(event): # Check vhost if vhost is not None and getattr(event.createby, 'vhost', '') != vhost: return False # First parse the path # RFC said we should accept absolute path psplit = urlsplit(event.path) if psplit.path[:1] != b'/': # For security reason, ignore unrecognized path return False if psplit.netloc and host is not None and host != psplit.netloc: # Maybe a proxy request, ignore it return False if getattr(event.createby, 'unquoteplus', True): realpath = unquote_plus_to_bytes(psplit.path) else: realpath = unquote_to_bytes(psplit.path) m = regm.match(realpath) if m is None: return False event.realpath = realpath event.querystring = psplit.query event.path_match = m return True
def get_decoder(enc): if enc == 'base64': return a2b_base64 elif enc == 'url': return lambda x: unquote_to_bytes(x.replace(b'+', b'%20')) elif enc in ('lower_hex', 'upper_hex', 'hex'): return a2b_hex
def _crawl(self, urls: list) -> list: """Crawls faster using requests_futures lib :param urls: urls to be crawled :return: sorted response objects """ futures = list() unsorted_result = list() sorted_result = list() for i in range(0, len(urls)): #initialize list sorted_result.append(i) ses = FuturesSession(session=self._session, max_workers=cons.WORKERS) for i in range(0, len(urls)): futures.append(ses.get(url=urls[i])) urls[i] = urls[i].encode('utf-8') for future in cf.as_completed(futures): result = future.result() unsorted_result.append(result) for resp in unsorted_result: url = parse.unquote_to_bytes(resp.request.url) index = urls.index(url) sorted_result[index] = resp return sorted_result
def parse_qsl_to_bytes(query_string, keep_blank_values=False, strict_parsing=False): """Parse a query given as a string argument. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. Returns a list, as G-d intended. """ query_string, _coerce_result = _coerce_args(query_string) pairs = [s2 for s1 in query_string.split('&') for s2 in s1.split(';')] res = [] for name_value in pairs: if not name_value and not strict_parsing: continue nval = name_value.split('=', 1) if len(nval) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value,)) # Handle case of a control-name with no equal sign if keep_blank_values: nval.append('') else: continue if len(nval[1]) or keep_blank_values: name = nval[0].replace('+', ' ') name = unquote_to_bytes(name) name = _coerce_result(name) value = nval[1].replace('+', ' ') value = unquote_to_bytes(value) value = _coerce_result(value) res.append((name, value)) return res
def decompress(): info = "BZh91AY%26SY%94%3A%E2I%00%00%21%19%80P%81%11%00%AFg%9E%A0+%00hE%3DM%B5%23%D0%D4%D1%E2%8D%06%A9%FA" + \ "%26S%D4%D3%21%A1%EAi7h%9B%9A%2B%BF%60%22%C5WX%E1%ADL%80%E8V%3C%C6%A8%DBH%2632%18%A8x%01%08%21%8DS" + \ "%0B%C8%AF%96KO%CA2%B0%F1%BD%1Du%A0%86%05%92s%B0%92%C4Bc%F1w%24S%85%09%09C%AE%24%90" info = info.replace("+", "%20") info = parse.unquote_to_bytes(info) info = bz2.decompress(info).decode("utf-8") print(info)
def parse_data_uri(uri): """ Parse a data: URI, returning a 3-tuple of media type, dictionary of media type parameters, and data. """ if not isinstance(uri, bytes): uri = safe_url_string(uri).encode('ascii') try: scheme, uri = uri.split(b':', 1) except ValueError: raise ValueError("invalid URI") if scheme.lower() != b'data': raise ValueError("not a data URI") # RFC 3986 section 2.1 allows percent encoding to escape characters that # would be interpreted as delimiters, implying that actual delimiters # should not be percent-encoded. # Decoding before parsing will allow malformed URIs with percent-encoded # delimiters, but it makes parsing easier and should not affect # well-formed URIs, as the delimiters used in this URI scheme are not # allowed, percent-encoded or not, in tokens. uri = unquote_to_bytes(uri) media_type = "text/plain" media_type_params = {} m = _mediatype_pattern.match(uri) if m: media_type = m.group().decode() uri = uri[m.end():] else: media_type_params['charset'] = "US-ASCII" while True: m = _mediatype_parameter_pattern.match(uri) if m: attribute, value, value_quoted = m.groups() if value_quoted: value = re.sub(br'\\(.)', r'\1', value_quoted) media_type_params[attribute.decode()] = value.decode() uri = uri[m.end():] else: break try: is_base64, data = uri.split(b',', 1) except ValueError: raise ValueError("invalid data URI") if is_base64: if is_base64 != b";base64": raise ValueError("invalid data URI") data = base64.b64decode(data) return _ParseDataURIResult(media_type, media_type_params, data)
def post(self, request): subject = u'日志报警系统' mes = dict() otherMes = dict() Errors = list() Mails = dict() messages = json.loads(request.body.decode('utf-8')) for AppLogs in messages: AppName = AppLogs['appName'] for Logs in AppLogs["errors"]: Log = Logs['message'].replace('%u', '\\u') bytes = parse.unquote_to_bytes(Log) bytes = bytes.decode('unicode-escape') # httpRef = "%s/app/kibana#/discover?_g=(refreshInterval:(pause:!t,value:0),time:(from:%s,mode:quick,to:now))&_a=(columns:!(_source),index:'22595e70-fb92-11e9-8a18-2b708bc20a0c',interval:auto,query:(language:lucene,query:'_id:%s'),sort:!('@timestamp',desc))" %(KIBANA_URL,KIBANA_DATE_TIME,Logs['id']) # httpRef = "%s/app/kibana#/doc/%s/%s/fluentd?id=%s&_g=(refreshInterval:(pause:!t,value:0),time:(from:%s,mode:quick,to:now))" %(KIBANA_URL,kibana_api.data,Logs['index'],Logs['id'],KIBANA_DATE_TIME) httpRef = "%s/app/kibana#/context/%s/%s/%s?_a=(columns:!(_source),filters:!(('$state':(store:appState),meta:(alias:!n,disabled:!f,index:'4fa5af10-0429-11ea-9ef8-019832b3e032',key:appname,negate:!f,params:(query:%s,type:phrase),type:phrase,value:%s),query:(match:(appname:(query:%s,type:phrase))))),predecessorCount:5,sort:!('@timestamp',desc),successorCount:5)&_g=(refreshInterval:(pause:!t,value:0),time:(from:%s,mode:quick,to:now))" % ( KIBANA_URL, kibana_api.data, Logs['type'], Logs['id'], AppLogs['appName'], AppLogs['appName'], AppLogs['appName'], KIBANA_DATE_TIME) if len(bytes) > 150: bytesLimit = bytes[0:150] + "......" Errors.append({ 'message': bytes, 'count': len(bytes), 'bytesLimit': bytesLimit, 'httpRef': httpRef }) else: Errors.append({ 'message': bytes, 'count': len(bytes), 'httpRef': httpRef }) if AppName in EMAIL_TO: Mails[AppName] = EMAIL_TO[AppName] mes[AppName] = Errors if mes != {}: html_content = loader.render_to_string( 'logs-mail.html', { 'user': Mails[AppName]['username'], 'messages': mes }) kibana_mail.send_html_mail(subject, html_content, Mails[AppName]['mailto']) else: Mails['other'] = EMAIL_TO['other'] otherMes[AppName] = Errors if otherMes != {}: html_other_content = loader.render_to_string( 'logs-mail.html', { 'user': Mails['other']['username'], 'messages': otherMes }) kibana_mail.send_html_mail(subject, html_other_content, Mails['other']['mailto']) code = {'code': 200} return HttpResponse(json.dumps(code), content_type="application/json")
async def read_form(self): if self._form is None: self._form = CIMultiDict() if (self.headers.content_type.type == 'application') and (self.headers.content_type.subtype == 'x-www-form-urlencoded'): body = await self.read_body() for parameter in body.split(b'&'): name, value = parameter.split(b'=') self._form.add( unquote_to_bytes(name).decode('utf-8'), unquote_to_bytes(value).decode('utf-8')) elif (self.headers.content_type.type == 'multipart') and (self.headers.content_type.subtype == 'form-data'): # TODO: replace with multifruits parser = BytesFeedParser(policy=HTTP, _factory=message_factory) parser.feed(b'Content-Type: %s\r\n\r\n' % self.raw_headers['Content-Type']) async with self.open_body() as stream: while True: data = await stream.read() if not data: break parser.feed(data) message = parser.close() for part in message.walk(): if part.get_content_type() != 'multipart/form-data': params = dict(part.get_params(header='content-disposition')) name = params.get('name') if name: payload = part.get_payload(decode=True) if payload: if part.get_content_type() == 'application/form-data': self._form.add(name, payload.decode('utf-8')) else: self._form.add( name, RequestFilePart(params.get('filename'), part.items(), part.get_payload(decode=True))) return self._form
def call(self): if self.account.has_inbox(): to = [ s for s in [s.strip() for s in self.request.get_post('to').split(',')] if s ] subject = self.request.get_post('subject') body = self.request.get_post('body') attachments = self.request.get_post('attachments') attachment_data = self.request.get_post('_attachments') if type(attachments) is str: attachments = [attachments] if type(attachment_data) is str: attachment_data = [attachment_data] if attachments: print(attachments) print(attachment_data[0].replace('%', ' %').replace('\n', ' ')) msg = mail.Email(self.account.email, *to, subject=subject) msg.write(body) if attachments and attachment_data: for i, a in enumerate(attachments): print(unquote(a), unquote_to_bytes(attachment_data[i])) msg._attach(unquote(a), unquote_to_bytes(attachment_data[i])) self.response.set_body('Done.') try: remote = mail.SMTPRemote(self.account.email, self.account.inbox.auth.password) remote.send(msg) except Exception as e: self.server.log('SMTP exception thrown - %s: %s' % (str(e), str(e.args))) self.response.set_body('An error occurred.', append=False) else: self.response.send_error( 403, 'An inbox has not been generated for this account yet. Please contact Yovel or a high-level SGA representative if you believe this was an error.' )
def parse_qsl_to_bytes( qs: str, keep_blank_values: bool = False ) -> List[Tuple[bytes, bytes]]: """Parse a query given as a string argument. Data are returned as a list of name, value pairs as bytes. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. """ # This code is the same as Python3's parse_qsl() # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a) # except for the unquote(s, encoding, errors) calls replaced # with unquote_to_bytes(s) coerce_args = cast(Callable[..., Tuple[str, Callable]], _coerce_args) qs, _coerce_result = coerce_args(qs) pairs = [s2 for s1 in qs.split("&") for s2 in s1.split(";")] r = [] for name_value in pairs: if not name_value: continue nv = name_value.split("=", 1) if len(nv) != 2: # Handle case of a control-name with no equal sign if keep_blank_values: nv.append("") else: continue if len(nv[1]) or keep_blank_values: name: StrOrBytes = nv[0].replace("+", " ") name = unquote_to_bytes(name) name = _coerce_result(name) value: StrOrBytes = nv[1].replace("+", " ") value = unquote_to_bytes(value) value = _coerce_result(value) r.append((cast(bytes, name), cast(bytes, value))) return r
def visits_for_titles(wiki_title_ptrs, wiki_visits_pagecounts_file, file_index, wikicode, verbosity): ''' Append page visits to the data objects. We expect several of these, so append each to an array, e.g. {'Q':Qid1, PGviews:[12,19,203]} In the more recent files, missing values indicate <5 hits in that month, so we set these to 0 Having several values (one per month) allows us to trim off any that show an unusual spike NB: see https://dumps.wikimedia.org/other/pagecounts-ez/ for format. Pageviews totals files have a wikicode project name in ascii followed by .z for wikipedias (e.g. en.z) followed by space, followed by uri-escaped title, followed by space, followed by integer. The format is a very difficult one to parse, as it varies e.g. there are multiple differently quoted version of the same title, sometime with spaces not underscores, unicode encoding sometimes fails, the bzip file sometimes appears truncated, etc etc. I've found that the best way to do this is to unquote_to_bytes first (to remove uri-encoding), then convert to unicode. In fact, the encoding is unclear, and sometimes utf-8 encoding seems to fail, so we pass on any utf-8 conversion errors. Hopefully this should only affect a few taxa where the page title has odd accents that have not been either uri-escaped, or properly encoded in utf-8. ''' from urllib.parse import unquote_to_bytes used = 0 match_project = (wikicode +' ').encode() start_char = len(match_project) with bz2.open(wiki_visits_pagecounts_file, 'rb') as PAGECOUNTfile: try: problem_lines = [] #there are apparently some errors in the unicode dumps for n, line in enumerate(PAGECOUNTfile): if (n % 10000000 == 0) and verbosity: print("Reading pagecount file of number of page views: {} entries read from file {} ({}): mem usage {} Mb".format(n, file_index, wiki_visits_pagecounts_file.name, memory_usage_resource()), file=sys.stderr) if line.startswith(match_project): try: info = line[start_char:].rstrip(b'\r\n\\rn').rsplit(b' ', 1) title = unquote_to_bytes(info[0]).decode('UTF-8').replace(" ", "_") #even though most titles should not have spaces, some can sneak in via uri escaping wiki_title_ptrs[title]['PGviews'][file_index] = (wiki_title_ptrs[title]['PGviews'][file_index] or 0) + int(info[1]) #sometimes there are multiple encodings of the same title, with different visit numbers used += 1 except UnicodeDecodeError: problem_lines.append(str(n)) except KeyError: pass #title not in wiki_title_ptrs - this is expected for most entries except ValueError as e: if verbosity: print(e, file=sys.stderr) print(" Problem converting page view to an integer for {}".format(line), file=sys.stderr) except EOFError as e: #this happens sometimes, dunno why if verbosity: print(" Problem with end of file: {}. Used {} entries (should be {}: {}%. Skipping to next".format(e.args[-1],used, len(wiki_title_ptrs), used/len(wiki_title_ptrs) * 100), file=sys.stderr) if len(problem_lines): if verbosity>0: if verbosity<=2: print(" Problem decoding {} lines, but these will be ones with strange accents etc, so should mostly not be taxa.".format(len(problem_lines)), file=sys.stderr) else: print(" Problem decoding certain lines: the following lines have been ignored:\n{}".format(" \n".join(problem_lines)), file=sys.stderr) if verbosity: print(" NB: of {} WikiData taxon entries, {} ({:.2f}%) have pageview data for {} in '{}'. mem usage {:.1f} Mb".format(len(wiki_title_ptrs), used, used/len(wiki_title_ptrs) * 100, wikicode, wiki_visits_pagecounts_file if isinstance(wiki_visits_pagecounts_file, str) else wiki_visits_pagecounts_file.name, memory_usage_resource()), file=sys.stderr)
def __iter__(self): for line in self._buf.split(b'\0'): if line: part = self.Part() start, end, text_data = line.split(b',') part.start = int(start) part.end = int(end) part.text_data = unquote_to_bytes(text_data) yield part
def parse_data_url(url): scheme, data = url.split(":", 1) assert scheme == "data", "unsupported scheme: " + scheme content_type, data = data.split(",", 1) # base64 urls might have a padding which might (should) be quoted: data = unquote_to_bytes(data) if content_type.endswith(";base64"): return binascii.a2b_base64(data), content_type[:-7] or None return data, content_type or None
def recognize(contours): # convert our nd list to encoded form data payload = "strokes=" + parse.quote(str(contours).encode("utf-8")) # convert data to bytes and submit response = request.urlopen(ENDPOINT, parse.unquote_to_bytes(payload)) # clean up result return str(response.read().decode('UTF-8')).rstrip('\r\n')
def download(self, key, category, post, file, filename): l = self.l headers = { "User-Agent": "Mozilla AppleWebKit Chrome Safari" } headers = None response = requests.get(file, headers=headers) if response.ok: content = response.content if not filename: headers = response.headers if "Content-disposition" in headers: content_disposition = headers["Content-disposition"] matched = self.CONTENT_REGEX.match(content_disposition) if matched: filename = matched.group(1) if not filename: parsed = urlparse(file) paths = parsed.path.split('/') filename = paths[-1] if filename: try: filename = unquote_to_bytes(filename).decode("utf-8") except: try: filename = unquote_to_bytes(filename).decode("euc-kr") except: l.og("Error: Can't find filename.") filename = Tools.md5(file) l.og("\tSave to: {}".format(filename)) data = self.dm.find(post, filename) if not data: l.og("[Download] {}".format(filename)) wo = open(os.sep.join((self.savedir, filename)), "wb") wo.write(content) wo.close() self.dm.add(key, category, post, file, filename) return True else: l.og("Error: requests.get()") l.og("\t{}".format(file))
def _unquotepath(path): for reserved in ('2f', '2F', '3f', '3F'): path = path.replace('%' + reserved, '%25' + reserved.upper()) # standard lib's unquote() does not work for non-UTF-8 # percent-escaped characters, they get lost. # e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD) # # unquote_to_bytes() returns raw bytes instead return unquote_to_bytes(path)
def _get_path(self, parsed): path = parsed.path # If there are parameters, add them if parsed.params: path += ";" + parsed.params path = unquote_to_bytes(path) # Replace the behavior where non-ASCII values in the WSGI environ are # arbitrarily decoded with ISO-8859-1. # Refs comment in `get_bytes_from_wsgi()`. return path.decode('iso-8859-1')
def createQrcode(url): data = {"device_num":"1", "device_id_list":["koovox_02"]} data = json.dumps(data, ensure_ascii=False) request = req.Request(url, method = 'POST') request.add_header('Content-Type', 'application/json') request.add_header('encoding', 'utf-8') response = req.urlopen(request, parse.unquote_to_bytes(data)) result = response.read() print(result) return result
def init_state(self): state = HgRepoHelper.state() self._branchmap = { unquote_to_bytes(branch): [unhexlify(h) for h in heads.split(b' ')] for line in state['branchmap'].splitlines() for branch, heads in (line.split(b' ', 1), ) } self._heads = [unhexlify(h) for h in state['heads'][:-1].split(b' ')] self._bookmarks = self._decode_keys(state['bookmarks'])
def unescapeRepeatedly(input): '''Argument may be str or bytes. Returns bytes.''' if None == input: return None while True: un = unquote_to_bytes(input) if un == input: return input input = un
def createMenu(token, menu): url = 'https://api.weixin.qq.com/cgi-bin/menu/create?access_token=%s' %token data = menu data = json.dumps(data,ensure_ascii=False) request = req.Request(url, method='POST') request.add_header('Content-Type', 'application/json') request.add_header('encoding', 'utf-8') response = req.urlopen(request, parse.unquote_to_bytes(data)) result = response.read() print(result) return result
def url_unescape(value, encoding='utf-8'): """Decodes the given value from a URL. The argument may be either a byte or unicode string. If encoding is None, the result will be a byte string. Otherwise, the result is a unicode string in the specified encoding. """ if encoding is None: return urllib_parse.unquote_to_bytes(value) else: return urllib_parse.unquote_plus(to_basestring(value), encoding=encoding)
def _qs(req, encoding='utf-8', to_unicode=False): if req.method == 'POST': qs = req.body else: qs = req.url.partition('?')[2] if six.PY2: uqs = unquote(to_native_str(qs, encoding)) elif six.PY3: uqs = unquote_to_bytes(qs) if to_unicode: uqs = uqs.decode(encoding) return parse_qs(uqs, True)
def do_GET(self): sp = re.split('\?|&|=', self.path) transmission.add_info_hash( hexlify( unquote_to_bytes( sp[sp.index('info_hash')+1] ) ).decode() ) self.send_response(200) self.send_header("Content-type", "text/plain") self.end_headers() self.wfile.write(b'd8:intervali3600e5:peers6:\x7f\x00\x00\x01\x00\x00e')
def frankiz_check(request): import logging logger = logging.getLogger(__name__) if not "timestamp" in request.query_params.keys() or not "response" in request.query_params.keys() or not "hash" in request.query_params.keys(): logger.error('KEYS') raise NotAuthenticated() response = parse.unquote_to_bytes(request.query_params.get("response")) ts = parse.unquote_to_bytes(request.query_params.get("timestamp")) h = parse.unquote_to_bytes(request.query_params.get("hash")) if abs(int(time.time()) - int(ts)) > 3600*3: logger.error('TS') raise NotAuthenticated() if hashlib.md5(ts + FKZ_KEY + response).hexdigest() != h.decode(): logger.error('HASH') raise NotAuthenticated() data = json.loads(response.decode()) return request
def chainFollow(nothingValue): seen = '' print("Working...") while True: response = urllib.urlopen(site+nothingValue) cookie = re.findall("(info=)(.*?)(;)",dict(response.info())["Set-Cookie"]) data = str(response.read()) found = re.findall("(next busynothing is )([0-9]+)",data) seen += cookie[0][1] if(len(found) > 0): nothingValue = found[0][1] else: print(bz2.decompress(unquote_to_bytes(seen.replace("+"," "))).decode()) break
def deviceAuth(url): data = {"device_num":"1","device_list":\ [{"id":"koovox_02", "mac":"00025b00ff02", \ "connect_protocol":"3", "auth_key":"", \ "close_strategy":"1", "conn_strategy":"1",\ "crypt_method":"0", "auth_ver":"0", \ "manu_mac_pos":"-1", "ser_mac_pos":"-2"}], "op_type":"0"} data = json.dumps(data,ensure_ascii=False) request = req.Request(url, method='POST') request.add_header('Content-Type', 'application/json') request.add_header('encoding', 'utf-8') response = req.urlopen(request, parse.unquote_to_bytes(data)) result = response.read() print(result) return result
def _unquotepath(path): for reserved in ('2f', '2F', '3f', '3F'): path = path.replace('%' + reserved, '%25' + reserved.upper()) if six.PY2: # in Python 2, '%a3' becomes '\xa3', which is what we want return unquote(path) else: # in Python 3, # standard lib's unquote() does not work for non-UTF-8 # percent-escaped characters, they get lost. # e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD) # # unquote_to_bytes() returns raw bytes instead return unquote_to_bytes(path)
def uri_to_iri(uri): """ Converts a Uniform Resource Identifier(URI) into an Internationalized Resource Identifier(IRI). This is the algorithm from section 3.2 of RFC 3987. Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/'). """ if uri is None: return uri uri = force_bytes(uri) iri = unquote_to_bytes(uri) return repercent_broken_unicode(iri).decode('utf-8')
def url_unescape(value, encoding='utf-8', plus=True): """Decodes the given value from a URL. :param value: url will be unescaped :param encoding: url encoding :param plus: whether need plus default True :return: the unescaped url """ if encoding is None: if plus: # unquote_to_bytes doesn't have a _plus variant value = to_basestring(value).replace('+', ' ') return urllib_parser.unquote_to_bytes(value) else: unquote = (urllib_parser.unquote_plus if plus else urllib_parser.unquote) return unquote(to_basestring(value), encoding=encoding)
def convlocation(location): match = re.match(r"file://(\w+/[A-Z]:)(/.+)", location) if not match: sys.exit("Invalid location: " + location) drive = match.group(1) path = match.group(2) if drive not in cfg["driveMapping"]: sys.exit("Unknown drive: %s" % drive) mapped_drive = cfg["driveMapping"][drive] if mapped_drive is None: return None # Conversion UTF-8 NFD (Mac) --> UTF-8 NFC (Linux/Windows/W3C/...) path = quote(unicodedata.normalize('NFC', unquote_to_bytes(path).decode("utf-8")),"/!'(),&~+$") if path[-1] == '/': path = path[0:-1] return "file://" + mapped_drive + path
def get_environ(self): env = self.server.get_environ() env['REQUEST_METHOD'] = self.command env['SCRIPT_NAME'] = '' if '?' in self.path: path, query = self.path.split('?', 1) else: path, query = self.path, '' env['PATH_INFO'] = unquote(path) if not PY3 else to_local(unquote_to_bytes(path)) env['QUERY_STRING'] = query if self.headers.typeheader is not None: env['CONTENT_TYPE'] = self.headers.typeheader length = self.headers.getheader('content-length') if length: env['CONTENT_LENGTH'] = length env['SERVER_PROTOCOL'] = self.request_version client_address = self.client_address if isinstance(client_address, tuple): env['REMOTE_ADDR'] = str(client_address[0]) env['REMOTE_PORT'] = str(client_address[1]) for key, value in self._headers(): if key in env: if 'COOKIE' in key: env[key] += '; ' + value else: env[key] += ',' + value else: env[key] = value if env.get('HTTP_EXPECT') == '100-continue': socket = self.socket else: socket = None chunked = env.get('HTTP_TRANSFER_ENCODING', '').lower() == 'chunked' self.wsgi_input = Input(self.rfile, self.content_length, socket=socket, chunked_input=chunked) env['wsgi.input'] = self.wsgi_input return env
def url_unescape(value, encoding='utf-8', plus=True): """解码来自于URL 的给定值. 该参数可以是一个字节或unicode 字符串. 如果encoding 是None , 该结果将会是一个字节串. 否则, 该结果会是 指定编码的unicode 字符串. 如果 ``plus`` 是true (默认值), 加号将被解释为空格(文字加号必须被 表示为"%2B"). 这是适用于查询字符串和form-encoded 的值, 但不是URL 的路径组件. 注意该默认设置和Python 的urllib 模块是相反的. .. versionadded:: 3.1 该 ``plus`` 参数 """ if encoding is None: if plus: # unquote_to_bytes doesn't have a _plus variant value = to_basestring(value).replace('+', ' ') return urllib_parse.unquote_to_bytes(value) else: unquote = (urllib_parse.unquote_plus if plus else urllib_parse.unquote) return unquote(to_basestring(value), encoding=encoding)
def unquote_bytes_to_wsgi(bytestring): return unquote_to_bytes(bytestring)
def unquote_bytes_to_wsgi(bytestring): return unquote_to_bytes(bytestring).decode('latin-1')
def uri_to_path(uri): """Convert URI to file path.""" # TODO: decide on Unicode vs. bytes for URIs return unquote_to_bytes(urlsplit(uri).path)