def Run(self): #self.win = browser.HtmlWindow(self.url, None) #self.win = self.win.open(self.url, specs = "width=640, height=480") self.win = browser.HtmlWindow(self.url, None) self.doc = w3c.parseString(TEST_HTML, self.win.doc) #self.win.fireOnloadEvents() file = open('tmall_item.html', 'w') file.write(str(self.win.doc)) file.close()
def __init__(self, url_or_dom, charset=None, headers={}, body={}, timeout=5): urllib2.socket.setdefaulttimeout(timeout) jsonp = False if(isinstance(url_or_dom, Document)): url = "localhost:document" dom = url_or_dom elif(url_or_dom.startswith('<')): url = "localhost:string" dom = parseString(url_or_dom) else: #url url = url_or_dom if(not re.match(r'\w+\:\/\/', url)): url = "http://" + url request = urllib2.Request(url, urllib.urlencode(body), headers=headers) response = urllib2.urlopen(url) contentType = response.headers.get('Content-Type') if(contentType): #print contentType t = re.search(r'x-javascript|json', contentType) if(t): jsonp = True m = re.match(r'^.*;\s*charset=(.*)$', contentType) if(m): charset = m.group(1) #print charset if(not charset): charset = 'utf-8' #default charset # guess charset from httpheader html = response.read() encoding = response.headers.get('Content-Encoding') if(encoding and encoding == 'gzip'): buf = StringIO(html) f = gzip.GzipFile(fileobj=buf) html = f.read() self.__html__ = html html = unicode(html, encoding=charset, errors='ignore') dom = parseString(html) navigator = browser.matchNavigator(headers.get('User-Agent') or '') browser.HtmlWindow.__init__(self, url, dom, navigator) CommonJS.__init__(self) self.console = JSConsole(self._js_logger) for module in "base, array.h, function.h, helper.h, object.h, string.h, date.h, custevent, selector, dom_retouch".split(","): self.execute(self.require, [module.strip()]) if(jsonp): code = "window.data=" + html.encode('utf-8') self.execute(code) #print code self._js_logger.info('JavaScript runtime ready.')
def setUp(self): self.doc = w3c.parseString(TEST_HTML) self.win = HtmlWindow(TEST_URL, self.doc)
</head> <body onload='load()'> <div id="id1">123</div> <frame src="#"/> <iframe src="#"/> <script> function load() { alert('onload'); } document.write("<p id='hello'>world</p>"); </script> </body> </html> """ doc = w3c.parseString(TEST_HTML) win = HtmlWindow(TEST_URL, doc) # doc.getElementById('id1') # print win.evalScript("(function(){return document.getElementById('id1')})()") class HtmlWindowTest(unittest.TestCase): def setUp(self): self.doc = w3c.parseString(TEST_HTML) self.win = HtmlWindow(TEST_URL, self.doc) def testWindow(self): self.assertEquals(self.doc, self.win.document) self.assertEquals(self.win, self.win.window) self.assertEquals(self.win, self.win.self)
def __init__(self, url_or_dom, charset=None, headers={}, body={}, timeout=2): urllib2.socket.setdefaulttimeout(timeout) jsonp = False if (isinstance(url_or_dom, Document)): url = "localhost:document" dom = url_or_dom elif (url_or_dom.startswith("<")): url = "localhost:string" dom = parseString(url_or_dom) else: #url url = url_or_dom if (not re.match(r"\w+\:\/\/", url)): url = "http://" + url request = urllib2.Request(url, urllib.urlencode(body), headers=headers) response = urllib2.urlopen(url, timeout=10) contentType = response.headers.get("Content-Type") if (contentType): #print contentType t = re.search(r"x-javascript|json", contentType) if (t): jsonp = True m = re.match(r"^.*;\s*charset=(.*)$", contentType) if (m): charset = m.group(1) #print charset if (not charset): charset = "utf-8" #default charset # guess charset from httpheader html = response.read() encoding = response.headers.get("Content-Encoding") if (encoding and encoding == "gzip"): buf = StringIO(html) f = gzip.GzipFile(fileobj=buf) html = f.read() self.__html__ = html html = unicode(html, encoding=charset, errors="ignore") dom = parseString(html) navigator = browser.Navigator(headers.get("User-Agent") or "") browser.HtmlWindow.__init__(self, url, dom, navigator) CommonJS.__init__(self) self.console = JSConsole(self._js_logger) # for module in "base, array.h, function.h, helper.h, object.h, string.h, date.h, custevent, selector, dom_retouch".split(","): # print module # print self.require # self.execute(self.require, [module.strip()]) if (jsonp): code = "window.data=" + html.encode("utf-8") self.execute(code) #print code self._js_logger.info("JavaScript runtime ready.")