def parse(self, data):
	# Locate url
	treebuilder = treebuilders.getTreeBuilder("etree", etree)
        p = html5parser.HTMLParser(tree=treebuilder)#, tokenizer=sanitizer.HTMLSanitizer)
        res = chardet.detect(data)
        if res and res.has_key('encoding'):
            encoding = res['encoding']
            
        if encoding is None:
            document = p.parse(StringIO(data))
        else:
	    document = p.parse(StringIO(data), encoding=encoding)
	d = document.xpath('//a[text()="XLS"]')
	if len(d) > 0:	    
	    url = d[0].attrib['href']
	    url = urljoin(self.url, url)
	    (status, headers, body) = httpc.get_(url, ok=[500, 501,300, 301, 503, 200, 404, 405])        
	    f = open(self.filename, 'wb')
	    f.write(body)
	    f.close()
	return self.readFile(self.filename)
Example #2
0
 def test_get_(self):
     status, msg, body = httpc.get_(self.base_url() + 'hello')
     self.assertEquals(status, 200)
     self.assertEquals(msg.dict['x-get'], 'hello')
     self.assertEquals(body, 'hello world')
 def get(self, url):
     (status, headers, body) = httpc.get_(url, ok=[500, 501, 300, 301, 503, 200, 404, 405])
     return body
Example #4
0
 def test_get_(self):
     status, msg, body = httpc.get_(self.base_url() + 'hello')
     self.assertEquals(status, 200)
     self.assertEquals(msg.dict['x-get'], 'hello')
     self.assertEquals(body, 'hello world')