Example #1
0
    def Run(self):
        #self.win = browser.HtmlWindow(self.url, None)
        #self.win = self.win.open(self.url, specs = "width=640, height=480")
        self.win = browser.HtmlWindow(self.url, None)
        self.doc = w3c.parseString(TEST_HTML, self.win.doc)

        #self.win.fireOnloadEvents()

        file = open('tmall_item.html', 'w')
        file.write(str(self.win.doc))
        file.close()
Example #2
0
	def __init__(self, url_or_dom, charset=None, headers={}, body={}, timeout=5):
		urllib2.socket.setdefaulttimeout(timeout)
		jsonp = False

		if(isinstance(url_or_dom, Document)):
			url = "localhost:document"
			dom = url_or_dom

		elif(url_or_dom.startswith('<')):
			url = "localhost:string"
			dom = parseString(url_or_dom)

		else: #url
			url = url_or_dom
			if(not re.match(r'\w+\:\/\/', url)):
				url = "http://" + url

			request = urllib2.Request(url, urllib.urlencode(body), headers=headers) 
			response = urllib2.urlopen(url)
			
			contentType = response.headers.get('Content-Type')

			if(contentType):
				#print contentType
				t = re.search(r'x-javascript|json', contentType)
				if(t):
					jsonp = True
				m = re.match(r'^.*;\s*charset=(.*)$', contentType)
				if(m):
					charset = m.group(1) 
				#print charset

			if(not charset):
				charset = 'utf-8' #default charset
				# guess charset from httpheader

			html = response.read()
			encoding = response.headers.get('Content-Encoding')

			if(encoding and encoding == 'gzip'):
			    buf = StringIO(html)
			    f = gzip.GzipFile(fileobj=buf)
			    html = f.read()	
			    			
			self.__html__ = html
			html = unicode(html, encoding=charset, errors='ignore')
			dom = parseString(html)	

		navigator = browser.matchNavigator(headers.get('User-Agent') or '')
			
		browser.HtmlWindow.__init__(self, url, dom, navigator)
		CommonJS.__init__(self)
		
		self.console = JSConsole(self._js_logger)
		
		for module in "base, array.h, function.h, helper.h, object.h, string.h, date.h, custevent, selector, dom_retouch".split(","):
			self.execute(self.require, [module.strip()])
		
		if(jsonp):
			code = "window.data=" + html.encode('utf-8')
			self.execute(code)
			#print code

		self._js_logger.info('JavaScript runtime ready.')
Example #3
0
 def setUp(self):
     self.doc = w3c.parseString(TEST_HTML)
     self.win = HtmlWindow(TEST_URL, self.doc)
Example #4
0
</head>
<body onload='load()'>
<div id="id1">123</div>
    <frame src="#"/>
    <iframe src="#"/>
    <script>
    function load()
    {
        alert('onload');
    }
    document.write("<p id='hello'>world</p>");
    </script>
</body>
</html>
"""
doc = w3c.parseString(TEST_HTML)
win = HtmlWindow(TEST_URL, doc)
# doc.getElementById('id1')

# print win.evalScript("(function(){return document.getElementById('id1')})()")


class HtmlWindowTest(unittest.TestCase):
    def setUp(self):
        self.doc = w3c.parseString(TEST_HTML)
        self.win = HtmlWindow(TEST_URL, self.doc)

    def testWindow(self):
        self.assertEquals(self.doc, self.win.document)
        self.assertEquals(self.win, self.win.window)
        self.assertEquals(self.win, self.win.self)
Example #5
0
 def setUp(self):
     self.doc = w3c.parseString(TEST_HTML)
     self.win = HtmlWindow(TEST_URL, self.doc)
Example #6
0
    def __init__(self,
                 url_or_dom,
                 charset=None,
                 headers={},
                 body={},
                 timeout=2):
        urllib2.socket.setdefaulttimeout(timeout)
        jsonp = False

        if (isinstance(url_or_dom, Document)):
            url = "localhost:document"
            dom = url_or_dom

        elif (url_or_dom.startswith("<")):
            url = "localhost:string"
            dom = parseString(url_or_dom)

        else:  #url
            url = url_or_dom
            if (not re.match(r"\w+\:\/\/", url)):
                url = "http://" + url

            request = urllib2.Request(url,
                                      urllib.urlencode(body),
                                      headers=headers)
            response = urllib2.urlopen(url, timeout=10)

            contentType = response.headers.get("Content-Type")

            if (contentType):
                #print contentType
                t = re.search(r"x-javascript|json", contentType)
                if (t):
                    jsonp = True
                m = re.match(r"^.*;\s*charset=(.*)$", contentType)
                if (m):
                    charset = m.group(1)
                #print charset

            if (not charset):
                charset = "utf-8"  #default charset
                # guess charset from httpheader

            html = response.read()
            encoding = response.headers.get("Content-Encoding")

            if (encoding and encoding == "gzip"):
                buf = StringIO(html)
                f = gzip.GzipFile(fileobj=buf)
                html = f.read()

            self.__html__ = html
            html = unicode(html, encoding=charset, errors="ignore")
            dom = parseString(html)

        navigator = browser.Navigator(headers.get("User-Agent") or "")

        browser.HtmlWindow.__init__(self, url, dom, navigator)
        CommonJS.__init__(self)

        self.console = JSConsole(self._js_logger)

        # for module in "base, array.h, function.h, helper.h, object.h, string.h, date.h, custevent, selector, dom_retouch".split(","):
        #     print module
        #     print self.require
        #     self.execute(self.require, [module.strip()])

        if (jsonp):
            code = "window.data=" + html.encode("utf-8")
            self.execute(code)
            #print code

        self._js_logger.info("JavaScript runtime ready.")