def process_redirect(self, response): if response.status_code == 302: if 'location' not in response.headers: raise UnexpectedNoResult() url = urlparse.urlparse(response.headers['location']) if url.scheme != "http" or url.netloc != "bit.ly" or url.path != "/a/warning": raise UnexpectedNoResult("Unexpected Location header after HTTP status 302") if sys.version_info[0] == 2: query = urlparse.parse_qs(url.query.encode('latin-1')) else: query = urlparse.parse_qs(url.query) if not ("url" in query and len(query["url"]) == 1) or not ("hash" in query and len(query["hash"]) == 1): raise UnexpectedNoResult("Unexpected Location header after HTTP status 302") if query["hash"][0] != self.current_shortcode: raise UnexpectedNoResult("Hash mismatch for HTTP status 302") if sys.version_info[0] == 2: unshortened_url = query["url"][0].decode('latin-1') else: unshortened_url = query["url"][0] return (URLStatus.ok, unshortened_url, None) else: return BaseService.process_redirect(self, response)
def _parse_errorhelp(self, response): match = re.search('<meta http-equiv="refresh" content="0;url=(.*?)">', response.text) if not match: raise UnexpectedNoResult( "No redirect on \"errorhelp\" page on HTTP status 200 for {0}". format(response.url)) url = urlparse.urlparse(match.group(1)) if url.scheme != "http" or url.netloc != "tinyurl.com" or url.path != "/errorb.php": raise UnexpectedNoResult( "Unexpected redirect on \"errorhelp\" page on HTTP status 200 for {0}" .format(response.url)) if sys.version_info[0] == 2: query = urlparse.parse_qs(url.query.encode('utf-8')) else: query = urlparse.parse_qs(url.query) if not ("url" in query and len(query["url"]) == 1) or not ("path" in query and len(query["path"]) == 1): raise UnexpectedNoResult( "Unexpected redirect on \"errorhelp\" page on HTTP status 200 for {0}" .format(response.url)) if query["path"][0] != ("/" + self.current_shortcode): raise UnexpectedNoResult( "Code mismatch on \"errorhelp\" on HTTP status 200") encoding = response.encoding if sys.version_info[0] == 2: try: result_url = query["url"][0].decode('utf-8') except UnicodeError: try: result_url = query["url"][0].decode('cp1252') encoding = 'cp1252' except UnicodeError: result_url = query["url"][0].decode('latin-1') encoding = 'latin-1' else: result_url = query["url"][0] return (URLStatus.ok, result_url, encoding)
def _scrub_url(self, code, url): parsed_url = urlparse.urlparse(url) if parsed_url.hostname == "redirect.tinyurl.com" and parsed_url.path == "/api/click": if sys.version_info[0] == 2: query = urlparse.parse_qs(parsed_url.query.encode('latin-1')) else: query = urlparse.parse_qs(parsed_url.query, encoding='latin-1') if query["out"]: if sys.version_info[0] == 2: scrubbed_url = query["out"][0].decode('latin-1') else: scrubbed_url = query["out"][0] return (URLStatus.ok, scrubbed_url, 'latin-1') return (URLStatus.ok, url, 'latin-1')
def _parse_errorhelp(self, response): match = re.search('<meta http-equiv="refresh" content="0;url=(.*?)">', response.text) if not match: raise UnexpectedNoResult("No redirect on \"errorhelp\" page on HTTP status 200 for {0}".format(response.url)) url = urlparse.urlparse(match.group(1)) if url.scheme != "http" or url.netloc != "tinyurl.com" or url.path != "/errorb.php": raise UnexpectedNoResult("Unexpected redirect on \"errorhelp\" page on HTTP status 200 for {0}".format(response.url)) if sys.version_info[0] == 2: query = urlparse.parse_qs(url.query.encode('utf-8')) else: query = urlparse.parse_qs(url.query) if not ("url" in query and len(query["url"]) == 1) or not ("path" in query and len(query["path"]) == 1): raise UnexpectedNoResult("Unexpected redirect on \"errorhelp\" page on HTTP status 200 for {0}".format(response.url)) if query["path"][0] != ("/" + self.current_shortcode): raise UnexpectedNoResult("Code mismatch on \"errorhelp\" on HTTP status 200") encoding = response.encoding if sys.version_info[0] == 2: try: result_url = query["url"][0].decode('utf-8') except UnicodeError: try: result_url = query["url"][0].decode('cp1252') encoding = 'cp1252' except UnicodeError: result_url = query["url"][0].decode('latin-1') encoding = 'latin-1' else: result_url = query["url"][0] return (URLStatus.ok, result_url, encoding)