def __init__(self, *args, **kwargs): BaseService.__init__(self, *args, **kwargs) self.user_agent = ( 'Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/39.0.2171.95 Safari/537.36 ' 'Nintendu/64 (URLTeam {0})' ).format(VERSION)
def process_redirect(self, response): status, link, encoding = BaseService.process_redirect(self, response) if link.startswith('/') and link.endswith('/'): url = self.params['url_template'].format(shortcode=self.current_shortcode) + '/' response = self.fetch_url(url, 'head') status, link, encoding = BaseService.process_redirect(self, response) if link in ('https://myspace.com/404', 'http://myspace.com/404'): return URLStatus.not_found, None, None else: return status, link, encoding
def process_unknown_code(self, response): if response.status_code != 200: return BaseService.process_unknown_code(self, response) url = self.params['url_template'].format(shortcode=self.current_shortcode) response = self.fetch_url(url, 'get') if response.status_code != 200: raise UnexpectedNoResult( "Didn't get OK on second try. Got {0} for {1}" .format(response.status_code, self.current_shortcode) ) # Copied form tinyback. I don't think code will reach here anymore match = re.search( "<a class=\"btn ignore\" href=\"(.*?)\" title=", html_unescape(response.text) ) if not match: raise UnexpectedNoResult( "Didn't get match on second try for {0}" .format(self.current_shortcode) ) return (URLStatus.ok, match.group(1), response.encoding)
def process_unknown_code(self, response): first_status_code = response.status_code if first_status_code not in (200, 500): return BaseService.process_unknown_code(self, response) url = self.params["url_template"].format(shortcode=self.current_shortcode) response = self.fetch_url(url, "get") second_status_code = response.status_code if second_status_code not in (200, 500): raise UnhandledStatusCode( "HTTP status changed from %s to %i on second request for %s" % (first_status_code, second_status_code, self.current_shortcode) ) pattern = '<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class="quote"><span class="quotet"></span><br/>(.*?)</div> <br />' match = re.search(pattern, response.text) if not match: text = response.text.replace("<br />\n", "") match = re.search(pattern, text) if not match: pattern = '<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class="quote"><span class="quotet"></span><br/>(.*?)</div> <br />' match = re.search(pattern, response.text, re.DOTALL) if not match: raise UnexpectedNoResult("Could not find target URL on preview page for {0}".format(self.current_shortcode)) url = html_unescape(match.group(1)) url = url.replace("\n", "").replace("\r", "") return URLStatus.ok, url, response.encoding
def process_redirect(self, response): status, url, encoding = BaseService.process_redirect(self, response) if url == 'http://yatuc.com': return URLStatus.not_found, None, None else: return status, url, encoding
def process_redirect(self, response): if response.status_code == 302: if 'location' not in response.headers: raise UnexpectedNoResult() url = urlparse.urlparse(response.headers['location']) if url.scheme != "http" or url.netloc != "bit.ly" or url.path != "/a/warning": raise UnexpectedNoResult("Unexpected Location header after HTTP status 302") if sys.version_info[0] == 2: query = urlparse.parse_qs(url.query.encode('latin-1')) else: query = urlparse.parse_qs(url.query) if not ("url" in query and len(query["url"]) == 1) or not ("hash" in query and len(query["hash"]) == 1): raise UnexpectedNoResult("Unexpected Location header after HTTP status 302") if query["hash"][0] != self.current_shortcode: raise UnexpectedNoResult("Hash mismatch for HTTP status 302") if sys.version_info[0] == 2: unshortened_url = query["url"][0].decode('latin-1') else: unshortened_url = query["url"][0] return (URLStatus.ok, unshortened_url, None) else: return BaseService.process_redirect(self, response)
def process_redirect_body(self, response): try: return BaseService.process_redirect_body(self, response) except UnexpectedNoResult: if b'Sajn\xc3\xa1lom, de a be\xc3\xadrt URL hib\xc3\xa1s!'.decode('utf8') in response.text: return (URLStatus.not_found, None, None) else: raise
def process_redirect(self, response): status, url, encoding = BaseService.process_redirect(self, response) if url.startswith('http://totally.awe.sm/') and \ url.endswith(self.current_shortcode): return URLStatus.not_found, None, None else: return status, url, encoding
def process_redirect(self, response): status, url, encoding = BaseService.process_redirect(self, response) if status == URLStatus.ok: if url == 'http://www.godaddy.com/default.aspx?isc=xcowebgd': return URLStatus.not_found, None, None return status, url, encoding
def process_redirect_body(self, response): try: return BaseService.process_redirect_body(self, response) except UnexpectedNoResult: if b'Sajn\xc3\xa1lom, de a be\xc3\xadrt URL hib\xc3\xa1s!'.decode( 'utf8') in response.text: return (URLStatus.not_found, None, None) else: raise
def process_redirect(self, response): try: url_status, link, encoding = BaseService.process_redirect(self, response) except UnexpectedNoResult: if "Location" not in response.headers: return URLStatus.not_found, None, None else: raise if link == "/site/getprivate?snip=" + self.current_shortcode: return URLStatus.unavailable, None, None else: return url_status, link, encoding
def process_redirect(self, response): try: url_status, link, encoding = BaseService.process_redirect( self, response) except UnexpectedNoResult: if 'Location' not in response.headers: return URLStatus.not_found, None, None else: raise if link == "/site/getprivate?snip=" + self.current_shortcode: return URLStatus.unavailable, None, None else: return url_status, link, encoding
def process_redirect(self, response): if response.status_code == 200: return self._fetch_200(response) else: if 'Location' in response.headers and response.status_code == 301: tiny = response.headers.get("X-tiny") if tiny and tiny[:3] == "aff": return self._preview(self.current_shortcode, response.headers['Location']) try: return BaseService.process_redirect(self, response) except UnexpectedNoResult: return (URLStatus.unavailable, None, None)
def process_redirect(self, response): if response.status_code == 200: return self._fetch_200(response) else: if 'Location' in response.headers and response.status_code == 301: tiny = response.headers.get("X-tiny") if tiny and tiny[:3] == "aff": return self._preview( self.current_shortcode, response.headers['Location'] ) try: return BaseService.process_redirect(self, response) except UnexpectedNoResult: return (URLStatus.unavailable, None, None)
def process_redirect_body(self, response): try: return BaseService.process_redirect_body(self, response) except UnexpectedNoResult: if 'cakeErr1-context' not in response.text: raise match = re.search(r'"Location: (.*)"</pre>', response.text, re.DOTALL) if not match: raise link = match.group(1) # Python's urllib escapes too much. We'll escape the bare minimum link = link.replace('\n', '%0A') link = link.replace('\r', '%0D') return (URLStatus.ok, link, response.encoding)
def process_redirect_body(self, response): try: return BaseService.process_redirect_body(self, response) except UnexpectedNoResult: if 'cakeErr1-context' not in response.text: raise match = re.search( r'"Location: (.*)"</pre>', response.text, re.DOTALL ) if not match: raise link = match.group(1) # Python's urllib escapes too much. We'll escape the bare minimum link = link.replace('\n', '%0A') link = link.replace('\r', '%0D') return (URLStatus.ok, link, response.encoding)
def process_unavailable(self, response): if response.status_code != 410: return BaseService.process_unavailable(self, response) match = re.search(r'was forwarding to: <BR> <font color=red>(.*)</font>', response.text) if not match: if re.search(r'This shortURL address was REMOVED for SPAMMING', response.text): return URLStatus.unavailable, None, None if not match and 'REMOVED FOR SPAMMING' in response.text: return URLStatus.unavailable, None, None if not match: raise UnexpectedNoResult( "Could not find target URL on blocked page for {0}" .format(self.current_shortcode)) url = html_unescape(match.group(1)) return URLStatus.ok, url, response.encoding
def process_unknown_code(self, response): first_status_code = response.status_code if first_status_code not in (200, 500): return BaseService.process_unknown_code(self, response) url = self.params['url_template'].format( shortcode=self.current_shortcode) response = self.fetch_url(url, 'get') second_status_code = response.status_code if second_status_code not in (200, 500): raise UnhandledStatusCode( "HTTP status changed from %s to %i on second request for %s" % (first_status_code, second_status_code, self.current_shortcode)) pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />" match = re.search(pattern, response.text) if not match: text = response.text.replace("<br />\n", "") match = re.search(pattern, text) if not match: pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />" match = re.search(pattern, response.text, re.DOTALL) if not match: raise UnexpectedNoResult( "Could not find target URL on preview page for {0}".format( self.current_shortcode)) url = html_unescape(match.group(1)) url = url.replace('\n', '').replace('\r', '') return URLStatus.ok, url, response.encoding
def scrape_one(self, sequence_number): self._processing_phishing_page = False return BaseService.scrape_one(self, sequence_number)
def __init__(self, *args, **kwargs): BaseService.__init__(self, *args, **kwargs) self._processing_phishing_page = False
def fetch_url(self, url): return BaseService.fetch_url(self, url + '?passthru=1')
def __init__(self, *args, **kwargs): BaseService.__init__(self, *args, **kwargs) self.user_agent = ('Mozilla/5.0 (Windows NT 6.1; WOW64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/39.0.2171.95 Safari/537.36 ' 'Nintendu/64 (URLTeam {0})').format(VERSION)
def process_redirect_body(self, response): if response.status_code == 301 and 'Location' not in response.headers: return (URLStatus.not_found, None, None) return BaseService.process_redirect_body(self, response)
def __init__(self, *args, **kwargs): self.shortcode_width = kwargs.pop('shortcode_width', 4) BaseService.__init__(self, *args, **kwargs)
def process_redirect(self, response): try: return BaseService.process_redirect(self, response) except UnexpectedNoResult: return (URLStatus.not_found, None, None)