def magic_probe(start_url, timeout=None, config=None): form_url = url_with_slash = url_without_slash = None resp = urllib2.urlopen(start_url, timeout=timeout) url = resp.geturl() for link in _link_re.findall(resp.read()): if link[0] in "\"'": link = link[1:-1] link = urlparse.urljoin(url, link) if urlparse.urlsplit(link).netloc == urlparse.urlsplit(url).netloc: if url_with_slash is None and link.endswith("/") and link != url: url_with_slash = link elif url_without_slash is None and not link.endswith("/"): url_without_slash = link if form_url is None: try: form_resp = urllib.urlopen(link).read() if "<form" in form_resp: form_url = link except IOError: pass return url, probe_website(url, form_url, url_with_slash, url_without_slash, timeout, config)
def magic_probe(start_url, timeout=None, config=None): form_url = url_with_slash = url_without_slash = None resp = urllib2.urlopen(start_url, timeout=timeout) url = resp.geturl() for link in _link_re.findall(resp.read()): if link[0] in '"\'': link = link[1:-1] link = urlparse.urljoin(url, link) if urlparse.urlsplit(link).netloc == urlparse.urlsplit(url).netloc: if url_with_slash is None and link.endswith('/') and link != url: url_with_slash = link elif url_without_slash is None and not link.endswith('/'): url_without_slash = link if form_url is None: try: form_resp = urllib.urlopen(link).read() if '<form' in form_resp: form_url = link except IOError: pass return url, probe_website(url, form_url, url_with_slash, url_without_slash, timeout, config)
def runtest(caption, url, *args, **kwargs): print_results(caption, url, probe_website(url, *args, **kwargs))