def add(self, url, content_type, label=None, thumb=''): if label is None: label = url label = unquote(label) thumb = unquote(thumb) execute = 'INSERT INTO {0!s} (addon_id, url, content_type, label, thumbnail) VALUES (?, ?, ?, ?, ?)'.format( self.TABLE) inserted = DATABASE.execute( execute, (self.ID, str(url), str(content_type), label, thumb)) if inserted == 1: execute = 'SELECT COUNT(*) FROM {0!s} WHERE addon_id=?'.format( self.TABLE) result = int(DATABASE.fetch(execute, (self.ID, ))[0][0]) if result > self.size_limit(): execute = 'DELETE FROM {0!s} WHERE ROWID = (SELECT MIN(ROWID) FROM {0!s}) AND addon_id=?'.format( self.TABLE) result, rowcount = DATABASE.execute_w_rowcount( execute, (self.ID, )) if rowcount < 1: execute = 'DELETE * FROM {0!s} WHERE addon_id=?'.format( self.TABLE) result, rowcount = DATABASE.execute_w_rowcount( execute, (self.ID, )) if rowcount < 1: result = DATABASE.execute('DROP TABLE {0!s}'.format( self.TABLE)) self.vacuum() self.create_table() if rowcount > 0: self.vacuum()
def get_media_url(self, host, media_id): try: web_url = self.get_url(host, media_id) html = self.net.http_GET(web_url, headers=self.headers).content stream_map = urllib_parse.unquote( re.findall('url_encoded_fmt_stream_map=([^&]+)', html)[0]) streams = stream_map.split(',') sources = [] streams_mp4 = [item for item in streams if 'video%2Fmp4' in item] for stream in streams_mp4: quality = re.findall('quality=([^&]+)', stream)[0] url = re.findall('url=([^&]+)', stream)[0] sources.append((quality, urllib_parse.unquote(url))) if sources: return helpers.pick_source(sources) except: if youtube_resolver is None: return 'plugin://plugin.video.youtube/play/?video_id=' + media_id else: streams = youtube_resolver.resolve(media_id) streams_no_dash = [ item for item in streams if item['container'] != 'mpd' ] stream_tuples = [(item['title'], item['url']) for item in streams_no_dash] if stream_tuples: return helpers.pick_source(stream_tuples) raise ResolverError('Video not found')
def __check_for_new_url(url): if 'google' in url: try: return unquote( re.findall(r'cache:[a-zA-Z0-9_\-]+:(.+?)\+&', url)[-1]) except: try: return unquote( re.findall(r'google[a-z]*\.[a-z]+/.*url=(.+?)[&$]', url)[-1]) except: pass if 'reddit' in url: try: return unquote( re.findall(r'http[s]?://out\.reddit\.com/.*?url=(.+?)&', url)[-1]) except: pass if 'youtu.be' in url: result = re.search( r'http[s]*://youtu\.be/(?P<video_id>[a-zA-Z0-9_\-]{11})', url) if result: return 'https://www.youtube.com/watch?v=%s' % result.group( 'video_id') return url
def _get_url(server, mode, url): sections = server.get_sections() for section in sections: is_video = section.is_movie() or section.is_show() if is_video: if mode in [MODES.TXT_TVSHOWS, MODES.TXT_MOVIES]: url = server.join_url(server.get_url_location(), section.get_path(), 'all') break if mode in [MODES.TXT_MOVIES_ON_DECK, MODES.TXT_TVSHOWS_ON_DECK]: url = server.join_url(server.get_url_location(), unquote(url), 'onDeck') break if mode in [ MODES.TXT_MOVIES_RECENT_ADDED, MODES.TXT_TVSHOWS_RECENT_ADDED ]: url = server.join_url(server.get_url_location(), unquote(url), 'recentlyAdded') break if mode in [ MODES.TXT_MOVIES_RECENT_RELEASE, MODES.TXT_TVSHOWS_RECENT_AIRED ]: url = server.join_url(server.get_url_location(), unquote(url), 'newest') break return url
def extract(self, carrier): if not hasattr(carrier, 'items'): raise InvalidCarrierException('carrier not a collection') trace_id, span_id, parent_id, flags = None, None, None, None baggage = None debug_id = None for key, value in six.iteritems(carrier): uc_key = key.lower() if uc_key == self.trace_id_header: if self.url_encoding: value = urllib_parse.unquote(value) trace_id, span_id, parent_id, flags = \ span_context_from_string(value) elif uc_key.startswith(self.baggage_prefix): if self.url_encoding: value = urllib_parse.unquote(value) attr_key = key[self.prefix_length:] if baggage is None: baggage = {attr_key.lower(): value} else: baggage[attr_key.lower()] = value elif uc_key == self.debug_id_header: if self.url_encoding: value = urllib_parse.unquote(value) debug_id = value if not trace_id and baggage: raise SpanContextCorruptedException('baggage without trace ctx') if not trace_id: if debug_id is not None: return SpanContext.with_debug_id(debug_id=debug_id) return None return SpanContext(trace_id=trace_id, span_id=span_id, parent_id=parent_id, flags=flags, baggage=baggage)
def play(path, player=True, history=None, thumb='', title=''): from .playback import play_this play_this(unquote(path), player=player, history=history, thumbnail=unquote(thumb), title=unquote(title))
def get_new_item(player=True): play_history = PlayHistory() playback_item = play_history.get_input() if playback_item: from .playback import play_this play_this(unquote(playback_item), title=unquote(playback_item), player=player)
def unquote(cls, value): """ Python 2 and 3 compat layer for utf-8 unquoting """ if six.PY2: return unquote(value).decode("utf8") else: return unquote(value.decode("ascii"))
def get_decode(str, reg=None): if reg: str = re.findall(reg, str)[0] s1 = urllib_parse.unquote(str[0:len(str) - 1]) t = '' for i in list(range(len(s1))): t += chr(ord(s1[i]) - s1[len(s1) - 1]) t = urllib_parse.unquote(t) # print t return t
def get_user_details(self, response): """Return user details from Mail.ru request.""" fullname, first_name, last_name = self.get_user_names( first_name=unquote(response['first_name']), last_name=unquote(response['last_name']) ) return {'username': unquote(response['nick']), 'email': unquote(response['email']), 'fullname': fullname, 'first_name': first_name, 'last_name': last_name}
def get_user_details(self, response): """Return user details from Mail.ru request""" fullname, first_name, last_name = self.get_user_names( first_name=unquote(response['first_name']), last_name=unquote(response['last_name']) ) return {'username': unquote(response['nick']), 'email': unquote(response['email']), 'fullname': fullname, 'first_name': first_name, 'last_name': last_name}
def get_user_details(self, response): fullname, first_name, last_name = self.get_user_names( fullname=unquote(response['name']), first_name=unquote(response['first_name']), last_name=unquote(response['last_name'])) return { 'username': response['uid'], 'email': '', 'fullname': fullname, 'first_name': first_name, 'last_name': last_name }
def get_user_details(self, response): """Return user details from Odnoklassniki request""" fullname, first_name, last_name = self.get_user_names( fullname=unquote(response['name']), first_name=unquote(response['first_name']), last_name=unquote(response['last_name'])) return { 'username': response['uid'], 'email': response.get('email', ''), 'fullname': fullname, 'first_name': first_name, 'last_name': last_name }
def get_user_details(self, response): fullname, first_name, last_name = self.get_user_names( fullname=unquote(response['name']), first_name=unquote(response['first_name']), last_name=unquote(response['last_name']) ) return { 'username': response['uid'], 'email': '', 'fullname': fullname, 'first_name': first_name, 'last_name': last_name }
def get_user_details(self, response): """Return user details from Odnoklassniki request""" fullname, first_name, last_name = self.get_user_names( fullname=unquote(response['name']), first_name=unquote(response['first_name']), last_name=unquote(response['last_name']) ) return { 'username': response['uid'], 'email': response.get('email', ''), 'fullname': fullname, 'first_name': first_name, 'last_name': last_name }
def unquote(cls, value, plus_as_space=False): """ Python 2 and 3 compat layer for utf-8 unquoting """ if six.PY2: if plus_as_space: return unquote_plus(value).decode("utf8") else: return unquote(value).decode("utf8") else: if plus_as_space: return unquote_plus(value.decode("ascii")) else: return unquote(value.decode("ascii"))
def _parse_gdocs(self, html): urls = [] if 'error' in html: reason = urllib_parse.unquote_plus(re.findall('reason=([^&]+)', html)[0]) raise ResolverError(reason) value = urllib_parse.unquote(re.findall('fmt_stream_map=([^&]+)', html)[0]) items = value.split(',') for item in items: _source_itag, source_url = item.split('|') if isinstance(source_url, six.text_type) and six.PY2: # @big change source_url = source_url.decode('unicode_escape').encode('utf-8') quality = self.itag_map.get(_source_itag, 'Unknown Quality [%s]' % _source_itag) source_url = urllib_parse.unquote(source_url) urls.append((quality, source_url)) return urls
def change_thumb(self, row_id, thumb): execute = 'UPDATE {0!s} SET thumbnail=? WHERE id=? AND addon_id=?'.format( self.TABLE) result = DATABASE.execute(execute, (unquote(thumb), row_id, self.ID)) if result != 1: kodi.notify(msg=kodi.i18n('thumbchange_failed'), sound=False) return result
def ulib(string, enc=False): try: string = urllib_parse.quote(string) if enc else urllib_parse.unquote( string) return string except: return string
def translate_path(self, path): """Translate a /-separated PATH to the local filename syntax. Components that mean special things to the local file system (e.g. drive or directory names) are ignored. (XXX They should probably be diagnosed.) """ import posixpath from six.moves.urllib_parse import unquote # abandon query parameters path = path.split('?', 1)[0] path = path.split('#', 1)[0] # Don't forget explicit trailing slash when normalizing. Issue17324 trailing_slash = path.rstrip().endswith('/') path = posixpath.normpath(unquote(path)) words = filter(None, path.split('/')) path = to_abs_path() for word in words: if os.path.dirname(word) or word in (os.curdir, os.pardir): # Ignore components that are not a simple file/directory name continue path = os.path.join(path, word) if trailing_slash: path += '/' return path
def test_store_with_file_pattern(self, tracker, node, expected_file_path): tracker.add(node) tracker.store() assert os.path.isfile(expected_file_path), "File not exist" with open(expected_file_path) as file_content: content = file_content.read().strip() assert urllib_parse.unquote(content) == node.nodeid
def List(url): listhtml = utils.getHtml(url, site.url) r = re.compile(r'<title>.+?(?:"list-albums"|"box\stag)', re.DOTALL | re.IGNORECASE).search(listhtml) if r: listhtml = r.group(0) match = re.compile(r'class="item.+?href="([^"]+).+?nal="([^"]+).+?le">\s*([^<]+).+?on">([^<]+)', re.DOTALL | re.IGNORECASE).findall(listhtml) for videopage, img, name, duration in match: name = utils.cleantext(name.strip()) site.add_download_link(name, videopage, 'Playvid', img, name, duration=duration) nextp = re.compile(r'class="next"><a\s*href="([^"]+)', re.DOTALL | re.IGNORECASE).search(listhtml) if nextp: nextp = nextp.group(1) if nextp.startswith('#'): block, pars = re.compile(r'class="next">.+?block-id="([^"]+).+?parameters="([^"]+)', re.DOTALL | re.IGNORECASE).findall(listhtml)[0] pno = re.compile(r'from[^\d]+(\d+)', re.IGNORECASE).findall(pars)[0] query = {'mode': 'async', 'function': 'get_block', 'block_id': block} for par in pars.split(';'): par1, par2 = par.split(':') if '+' in par1: for spar in par1.split('+'): query.update({spar: par2}) else: query.update({par1: urllib_parse.unquote(par2)}) nextp = "{0}?{1}".format(url.split('?')[0], urllib_parse.urlencode(query)) else: nextp = site.url[:-1] + nextp if 'http' not in nextp else nextp pno = nextp.split('/')[-2] site.add_dir('Next Page... ({0})'.format(pno), nextp, 'List', site.img_next) utils.eod()
def show_ecnf_isoclass(nf, conductor_label, class_label): conductor_label = unquote(conductor_label) conductor_label = convert_IQF_label(nf,conductor_label) try: nf_label, nf_pretty = get_nf_info(nf) except ValueError: return search_input_error() label = "-".join([nf_label, conductor_label, class_label]) full_class_label = "-".join([conductor_label, class_label]) cl = ECNF_isoclass.by_label(label) bread = [("Elliptic curves", url_for(".index"))] if not isinstance(cl, ECNF_isoclass): info = {'query':{}, 'err':'No elliptic curve isogeny class in the database has label %s.' % label} return search_input_error(info, bread) title = "Elliptic curve isogeny class %s over number field %s" % (full_class_label, cl.field_name) bread.append((nf_pretty, url_for(".show_ecnf1", nf=nf))) bread.append((conductor_label, url_for(".show_ecnf_conductor", nf=nf_label, conductor_label=conductor_label))) bread.append((class_label, url_for(".show_ecnf_isoclass", nf=nf_label, conductor_label=quote(conductor_label), class_label=class_label))) return render_template("ecnf-isoclass.html", credit=ecnf_credit, title=title, bread=bread, cl=cl, properties=cl.properties, friends=cl.friends, learnmore=learnmore_list())
def get_media_url(self, host, media_id): web_url = self.get_url(host, media_id) headers = {'User-Agent': common.RAND_UA} html = self.net.http_GET(web_url, headers=headers).content try: params = "".join([ x.replace("' + '", "") for x in self.between(html, "params += '", "';") ]) vkey = params.split('=')[-1] m = hashlib.md5() m.update(vkey + 'PT6l13umqV8K827') params += '&pkey=%s' % m.hexdigest() params = urllib_parse.unquote(params) url = 'http://www.drtuber.com/player_config/?' + params sources_html = self.net.http_GET(url, headers=headers).content if sources_html: sources = helpers.scrape_sources( sources_html, patterns=[r"""video_file>\<\!\[CDATA\[(?P<url>[^\]]+)"""]) if sources: return helpers.pick_source( sources) + helpers.append_headers(headers) raise ResolverError('File not found') except: raise ResolverError('File not found')
def unescape(self, s): i = 0 sc = list() if len(s) > 16: log.ThugLogging.shellcodes.add(s) # %xx format if '%' in s and '%u' not in s: return urllib.unquote(s) # %uxxxx format while i < len(s): if s[i] == '"': i += 1 continue if s[i] == '%' and (i + 1) < len(s) and s[i + 1] == 'u': if (i + 6) <= len(s): currchar = int(s[i + 2:i + 4], 16) nextchar = int(s[i + 4:i + 6], 16) sc.append(chr(nextchar)) sc.append(chr(currchar)) i += 6 elif (i + 3) <= len(s): currchar = int(s[i + 2:i + 4], 16) sc.append(chr(currchar)) i += 3 else: sc.append(s[i]) i += 1 return ''.join(sc)
def get_media_url(self, host, media_id): if not self.get_setting('login') == 'true' or not ( self.get_setting('username') and self.get_setting('password')): raise ResolverError('VeeHD requires a username & password') web_url = self.get_url(host, media_id) html = self.net.http_GET(web_url).content # two possible playeriframe's: stream and download for match in re.finditer(r'playeriframe.+?src\s*:\s*"([^"]+)', html): player_url = 'http://%s%s' % (host, match.group(1)) html = self.net.http_GET(player_url).content # if the player html contains an iframe the iframe url has to be gotten and then the player_url tried again r = re.search('<iframe.*?src="([^"]+)', html) if r: frame_url = 'http://%s%s' % (host, r.group(1)) self.net.http_GET(frame_url) html = self.net.http_GET(player_url).content patterns = [ r'"video/divx"\s+src="([^"]+)', r'"url"\s*:\s*"([^"]+)', 'href="([^"]+(?:mp4|avi))' ] for pattern in patterns: r = re.search(pattern, html) if r: stream_url = urllib_parse.unquote(r.group(1)) return stream_url raise ResolverError('File Not Found or Removed')
def resolve_fragment(self, document, fragment): """ Resolve a ``fragment`` within the referenced ``document``. :param document: The referrant document. :type document: dict :param fragment: A URI fragment to resolve within it :type fragment: str """ fragment = fragment.lstrip('/') parts = unquote(fragment).split('/') if fragment else [] for part in parts: part = part.replace('~1', '/').replace('~0', '~') if isinstance(document, Sequence): # Array indexes should be turned into integers try: part = int(part) except ValueError: pass try: document = document[part] except (TypeError, LookupError): raise RefResolutionError("Unresolvable JSON pointer: %r" % fragment) return document
def show_ecnf_isoclass(nf, conductor_label, class_label): if not FIELD_RE.fullmatch(nf): return abort(404) conductor_label = unquote(conductor_label) conductor_label = convert_IQF_label(nf,conductor_label) try: nf_label, nf_pretty = get_nf_info(nf) except ValueError: flash_error("%s is not a valid number field label", nf_label) return redirect(url_for(".index")) label = "-".join([nf_label, conductor_label, class_label]) if not CLASS_LABEL_RE.fullmatch(label): flash_error("%s is not a valid elliptic curve isogeny class label", label) return redirect(url_for(".index")) full_class_label = "-".join([conductor_label, class_label]) cl = ECNF_isoclass.by_label(label) if not isinstance(cl, ECNF_isoclass): flash_error("There is no elliptic curve isogeny class with label %s in the database", label) return redirect(url_for(".index")) bread = [("Elliptic curves", url_for(".index"))] title = "Elliptic curve isogeny class %s over number field %s" % (full_class_label, cl.field_name) bread.append((nf_pretty, url_for(".show_ecnf1", nf=nf))) bread.append((conductor_label, url_for(".show_ecnf_conductor", nf=nf_label, conductor_label=conductor_label))) bread.append((class_label, url_for(".show_ecnf_isoclass", nf=nf_label, conductor_label=quote(conductor_label), class_label=class_label))) return render_template("ecnf-isoclass.html", title=title, bread=bread, cl=cl, properties=cl.properties, friends=cl.friends, learnmore=learnmore_list())
def get_base_url_from_root(root): if root.base_url: # see :func:`.parse` for why we need to unquote base_url = unquote(root.base_url) else: base_url = root.base_url return reduce(urljoin, base_href(root)[:1], base_url)
def conninfo_uri_parse(dsn): ret = {} r = urlparse(dsn) if r.username: ret['user'] = r.username if r.password: ret['password'] = r.password if r.path[1:]: ret['dbname'] = r.path[1:] hosts = [] ports = [] for netloc in r.netloc.split('@')[-1].split(','): host = port = None if '[' in netloc and ']' in netloc: host = netloc.split(']')[0][1:] tmp = netloc.split(':', 1) if host is None: host = tmp[0] if len(tmp) == 2: host, port = tmp if host is not None: hosts.append(host) if port is not None: ports.append(port) if hosts: ret['host'] = ','.join(hosts) if ports: ret['port'] = ','.join(ports) ret = {name: unquote(value) for name, value in ret.items()} ret.update({name: value for name, value in parse_qsl(r.query)}) if ret.get('ssl') == 'true': del ret['ssl'] ret['sslmode'] = 'require' return ret
def unescape(self, s): i = 0 sc = str() if len(s) > 16: log.ThugLogging.shellcodes.add(s) # %xx format if '%' in s and '%u' not in s: return urllib.unquote(s) # %uxxxx format while i < len(s): if s[i] == '"': # pragma: no cover i += 1 continue if s[i] in ('%', ) and (i + 1) < len(s) and s[i + 1] == 'u': if (i + 6) <= len(s): currchar = int(s[i + 2:i + 4], 16) nextchar = int(s[i + 4:i + 6], 16) sc += chr(nextchar) sc += chr(currchar) i += 6 elif (i + 3) <= len(s): currchar = int(s[i + 2:i + 4], 16) sc += chr(currchar) i += 3 else: sc += s[i] i += 1 return sc
def url_distance(preprocessor, url1, url2): url1 = urlparse(url1) url2 = urlparse(url2) process_fn = lambda s: preprocessor(unquote(s)) path1 = map(process_fn, url1.path.strip('/').split('/')) path2 = map(process_fn, url2.path.strip('/').split('/')) path_distance = levenshtein_array(path1, path2) query_distance = dict_distance(preprocessor, parse_qs(url1.query, True), parse_qs(url2.query, True) ) domain_distance = 4 * levenshtein_array( (url1.hostname or '').split('.'), (url2.hostname or '').split('.') ) return ( domain_distance + path_distance + query_distance + (url1.fragment != url2.fragment) )
def __handle_unescape(self, key): start = 0 while True: start_js = self.js offset = self.js.find(key, start) if offset == -1: break offset += len(key) expr = '' extra = '' last_c = self.js[offset - 1] abort = False for i, c in enumerate(self.js[offset:]): extra += c if c == ')': break elif (i > 0 and c == '(') or (c == '[' and last_c != '+'): abort = True break elif c == '%' or c in string.hexdigits: expr += c last_c = c if not abort: self.js = self.js.replace(key + extra, urllib_parse.unquote(expr)) if start_js == self.js: break else: start = offset
def show_ecnf_conductor(nf, conductor_label): conductor_label = unquote(conductor_label) conductor_label = convert_IQF_label(nf, conductor_label) try: nf_label, nf_pretty = get_nf_info(nf) conductor_norm = conductor_label_norm(conductor_label) except ValueError: return search_input_error() info = to_dict(request.args, search_array=ECNFSearchArray()) info['title'] = 'Elliptic Curves over %s of Conductor %s' % ( nf_pretty, conductor_label) info['bread'] = [('Elliptic Curves', url_for(".index")), (nf_pretty, url_for(".show_ecnf1", nf=nf)), (conductor_label, url_for(".show_ecnf_conductor", nf=nf, conductor_label=conductor_label))] if len(request.args) > 0: # if requested field or conductor norm differs from nf or conductor_lable, redirect to general search if ('field' in request.args and request.args['field'] != nf_label) or \ ('conductor_norm' in request.args and request.args['conductor_norm'] != conductor_norm): return redirect(url_for(".index", **request.args), 307) info['title'] += ' Search Results' info['bread'].append(('Search Results', '')) info['field'] = nf_label info['conductor_label'] = conductor_label info['conductor_norm'] = conductor_norm return elliptic_curve_search(info)
def __extract_video(self, item): sources = [] for e in item: if isinstance(e, dict): for key in e: for item2 in e[key]: if isinstance(item2, list): for item3 in item2: if isinstance(item3, list): for item4 in item3: if isinstance( item4, six.text_type ) and six.PY2: # @big change item4 = item4.encode('utf-8') if isinstance( item4, six.string_types ) and six.PY2: # @big change item4 = urllib_parse.unquote( item4).decode('unicode_escape') for match in re.finditer( 'url=(?P<link>[^&]+).*?&itag=(?P<itag>[^&]+)', item4): link = match.group('link') itag = match.group('itag') quality = self.itag_map.get( itag, 'Unknown Quality [%s]' % itag) sources.append((quality, link)) if sources: return sources return sources
def unescape(self, s): i = 0 sc = list() if len(s) > 16: log.ThugLogging.shellcodes.add(s) # %xx format if '%' in s and '%u' not in s: return urllib.unquote(s) # %uxxxx format while i < len(s): if s[i] == '"': i += 1 continue if s[i] == '%' and (i + 1) < len(s) and s[i + 1] == 'u': if (i + 6) <= len(s): currchar = int(s[i + 2: i + 4], 16) nextchar = int(s[i + 4: i + 6], 16) sc.append(chr(nextchar)) sc.append(chr(currchar)) i += 6 elif (i + 3) <= len(s): currchar = int(s[i + 2: i + 4], 16) sc.append(chr(currchar)) i += 3 else: sc.append(s[i]) i += 1 return ''.join(sc)
def auth_url(self): """Return redirect url""" state = self.get_or_create_state() params = self.auth_params(state) params.update(self.get_scope_argument()) params.update(self.auth_extra_arguments()) params = urlencode(params) if not self.REDIRECT_STATE: # redirect_uri matching is strictly enforced, so match the # providers value exactly. params = unquote(params) return '{0}?{1}'.format(self.authorization_url(), params)
def pathfrag_to_basefile(self, pathfrag): """Does the inverse of :py:meth:`~ferenda.DocumentStore.basefile_to_pathfrag`, that is, converts a fragment of a file path into the corresponding basefile. :param pathfrag: The path fragment to decode :type pathfrag: str :returns: The resulting basefile :rtype: str """ if os.sep == "\\": pathfrag = pathfrag.replace("\\", "/") return unquote(pathfrag.replace('/%', '%'))
def extract(self, carrier): if not hasattr(carrier, 'items'): raise InvalidCarrierException('carrier not a collection') trace_id, span_id, parent_id, flags = None, None, None, None baggage = None debug_id = None for key, value in six.iteritems(carrier): uc_key = key.lower() if uc_key == self.trace_id_header: if self.url_encoding: value = urllib_parse.unquote(value) trace_id, span_id, parent_id, flags = \ span_context_from_string(value) elif uc_key.startswith(self.baggage_prefix): if self.url_encoding: value = urllib_parse.unquote(value) attr_key = key[self.prefix_length:] if baggage is None: baggage = {attr_key.lower(): value} else: baggage[attr_key.lower()] = value elif uc_key == self.debug_id_header: if self.url_encoding: value = urllib_parse.unquote(value) debug_id = value elif uc_key == self.baggage_header: if self.url_encoding: value = urllib_parse.unquote(value) baggage = self._parse_baggage_header(value, baggage) if not trace_id or not span_id: # reset all IDs trace_id, span_id, parent_id, flags = None, None, None, None if not trace_id and not debug_id and not baggage: return None return SpanContext(trace_id=trace_id, span_id=span_id, parent_id=parent_id, flags=flags, baggage=baggage, debug_id=debug_id)
def fragment_dict(self): """ Client side data dict represented as JSON in the fragment. """ if not self.parsed.fragment: return {} if self.parsed.fragment.startswith('%7B'): fragment = unquote(self.parsed.fragment) elif not self.parsed.fragment.startswith('{'): return {} else: fragment = self.parsed.fragment try: data = json.loads(fragment) if not isinstance(data, dict): data = {} except ValueError as exc: logger.error("%s. Unable to parse %r", exc, fragment) data = {} return data
def _ConvertHeaderToId(header): """Convert a Content-ID header value to an id. Presumes the Content-ID header conforms to the format that _ConvertIdToHeader() returns. Args: header: A string indicating the Content-ID header value. Returns: The extracted id value. Raises: BatchError if the header is not in the expected format. """ if not (header.startswith('<') or header.endswith('>')): raise exceptions.BatchError('Invalid value for Content-ID: %s' % header) if '+' not in header: raise exceptions.BatchError('Invalid value for Content-ID: %s' % header) _, request_id = header[1:-1].rsplit('+', 1) return urllib_parse.unquote(request_id)
def _download_url(self, url, user): # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? file_id = datetime.date.today().isoformat() + '_' + random_string(16) file_info = FileInfo( server_name=None, file_id=file_id, url_cache=True, ) with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) except Exception as e: # FIXME: pass through 404s and other error messages nicely logger.warn("Error downloading %s: %r", url, e) raise SynapseError( 500, "Failed to download content: %s" % ( traceback.format_exception_only(sys.exc_info()[0], e), ), Codes.UNKNOWN, ) yield finish() try: if b"Content-Type" in headers: media_type = headers[b"Content-Type"][0].decode('ascii') else: media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() content_disposition = headers.get(b"Content-Disposition", None) if content_disposition: _, params = cgi.parse_header(content_disposition[0],) download_name = None # First check if there is a valid UTF-8 filename download_name_utf8 = params.get("filename*", None) if download_name_utf8: if download_name_utf8.lower().startswith("utf-8''"): download_name = download_name_utf8[7:] # If there isn't check for an ascii name. if not download_name: download_name_ascii = params.get("filename", None) if download_name_ascii and is_ascii(download_name_ascii): download_name = download_name_ascii if download_name: download_name = urlparse.unquote(download_name) try: download_name = download_name.decode("utf-8") except UnicodeDecodeError: download_name = None else: download_name = None yield self.store.store_local_media( media_id=file_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=download_name, media_length=length, user_id=user, url_cache=url, ) except Exception as e: logger.error("Error handling downloaded %s: %r", url, e) # TODO: we really ought to delete the downloaded file in this # case, since we won't have recorded it in the db, and will # therefore not expire it. raise defer.returnValue({ "media_type": media_type, "media_length": length, "download_name": download_name, "created_ts": time_now_ms, "filesystem_id": file_id, "filename": fname, "uri": uri, "response_code": code, # FIXME: we should calculate a proper expiration based on the # Cache-Control and Expire headers. But for now, assume 1 hour. "expires": 60 * 60 * 1000, "etag": headers["ETag"][0] if "ETag" in headers else None, })
def unquote(self): """Return the object's URL unquote representation.""" return _safe_as_text(urllib.unquote(self.context))
def unquote_base_url(quoted): assert isinstance(quoted, unicode) quoted = quoted.encode('ascii') unquoted = unquote(quoted) return unquoted.decode('utf-8')
def decodeURIComponent(self, s): return urllib.unquote(s) if s else ""