Esempio n. 1
0
current = url.split("#")[0]
current = current.split("?")[0]
currentdir = "/".join(current.split("/")[:-1]) + "/"
proto = url.split("://")[0]

txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}

session = requests.Session()
session.proxies = proxy
r = session.get(url, headers=txheaders)

htmlSource = r.text
bs = BeautifulSoup.BeautifulSoup(htmlSource)

p = lswww.linkParser(url)
try:
    p.feed(htmlSource)
except HTMLParser.HTMLParseError, err:
    htmlSource = bs.prettify()
    try:
        p.reset()
        p.feed(htmlSource)
    except HTMLParser.HTMLParseError, err:
        p = lswww.linkParser2(url)
        p.feed(htmlSource)

jc.addcookies(r.cookies)

if len(p.forms) == 0:
    print(_("No forms found in this page !"))
Esempio n. 2
0
proto=url.split("://")[0]
agent =  {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}

req=urllib2.Request(url)
socket.setdefaulttimeout(6)
try:
  fd=urllib2.urlopen(req)
except IOError:
  print "Error getting url"
  sys.exit(1)
try:
  htmlSource=fd.read()
except socket.timeout:
  print "Error fetching page"
  sys.exit(1)
p=lswww.linkParser()
try:
  p.feed(htmlSource)
except HTMLParser.HTMLParseError,err:
  if tidyhere==1:
    options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0)
    htmlSource=str(tidy.parseString(htmlSource,**options))
    try:
      p.reset()
      p.feed(htmlSource)
    except HTMLParser.HTMLParseError,err:
      pass

if len(p.forms)==0:
  print "No forms found in this page !"
  sys.exit(1)
Esempio n. 3
0
req = urllib2.Request(url)
socket.setdefaulttimeout(6)
try:
  fd = urllib2.urlopen(req)
except IOError:
  print _("Error getting url")
  sys.exit(1)

try:
  htmlSource = fd.read()
except socket.timeout:
  print _("Error fetching page")
  sys.exit(1)

p = lswww.linkParser(url)
try:
  p.feed(htmlSource)
except HTMLParser.HTMLParseError, err:
  htmlSource = BeautifulSoup.BeautifulSoup(htmlSource).prettify()
  try:
    p.reset()
    p.feed(htmlSource)
  except HTMLParser.HTMLParseError, err:
    pass

lc.add(fd, htmlSource)

if len(p.forms) == 0:
  print _("No forms found in this page !")
  sys.exit(1)