current = url.split("#")[0] current = current.split("?")[0] currentdir = "/".join(current.split("/")[:-1]) + "/" proto = url.split("://")[0] txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} session = requests.Session() session.proxies = proxy r = session.get(url, headers=txheaders) htmlSource = r.text bs = BeautifulSoup.BeautifulSoup(htmlSource) p = lswww.linkParser(url) try: p.feed(htmlSource) except HTMLParser.HTMLParseError, err: htmlSource = bs.prettify() try: p.reset() p.feed(htmlSource) except HTMLParser.HTMLParseError, err: p = lswww.linkParser2(url) p.feed(htmlSource) jc.addcookies(r.cookies) if len(p.forms) == 0: print(_("No forms found in this page !"))
proto=url.split("://")[0] agent = {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} req=urllib2.Request(url) socket.setdefaulttimeout(6) try: fd=urllib2.urlopen(req) except IOError: print "Error getting url" sys.exit(1) try: htmlSource=fd.read() except socket.timeout: print "Error fetching page" sys.exit(1) p=lswww.linkParser() try: p.feed(htmlSource) except HTMLParser.HTMLParseError,err: if tidyhere==1: options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0) htmlSource=str(tidy.parseString(htmlSource,**options)) try: p.reset() p.feed(htmlSource) except HTMLParser.HTMLParseError,err: pass if len(p.forms)==0: print "No forms found in this page !" sys.exit(1)
req = urllib2.Request(url) socket.setdefaulttimeout(6) try: fd = urllib2.urlopen(req) except IOError: print _("Error getting url") sys.exit(1) try: htmlSource = fd.read() except socket.timeout: print _("Error fetching page") sys.exit(1) p = lswww.linkParser(url) try: p.feed(htmlSource) except HTMLParser.HTMLParseError, err: htmlSource = BeautifulSoup.BeautifulSoup(htmlSource).prettify() try: p.reset() p.feed(htmlSource) except HTMLParser.HTMLParseError, err: pass lc.add(fd, htmlSource) if len(p.forms) == 0: print _("No forms found in this page !") sys.exit(1)