def test_html_extractor(self): """Testing the HTMLExtractor function wether it gets the URL HTML page""" test_url = "http://www.google.com" self.assertEquals(html_utilities.html_extractor(""), "") response = html_utilities.html_extractor(test_url) self.assertTrue(len(response) > 3) self.assertEquals(response[0:4], "<!do")
def get(self): """ Receives the GET request with a URI parameter """ parameter = self.request.get('url') domain = parse_url(parameter)[0] # Including the base javascript for replacing the relative URLs response = html_extractor (parameter) # Adding the decorators functions text_decorator = JsReplaceDecorator(domain, CSSReplaceDecorator(domain)) #script_text = "<script type='text/javascript' src='/javascripts/replacing_urls.js'></script>" #response = response[0:response.find("</head>")] + script_text + response[response.find("</head>"):] self.response.headers['Content-Type'] = 'text/html; charset=UTF-8' self.response.out.write(text_decorator.decorate_text(response))
def get(self): """ Receives the GET request with a URI parameter """ parameter = self.request.get('url') domain = parse_url(parameter)[0] # Including the base javascript for replacing the relative URLs response = html_extractor(parameter) # Adding the decorators functions text_decorator = JsReplaceDecorator(domain, CSSReplaceDecorator(domain)) #script_text = "<script type='text/javascript' src='/javascripts/replacing_urls.js'></script>" #response = response[0:response.find("</head>")] + script_text + response[response.find("</head>"):] self.response.headers['Content-Type'] = 'text/html; charset=UTF-8' self.response.out.write(text_decorator.decorate_text(response))
def test_encoding_issues(self): test_url = "http://www.uol.com" response = html_utilities.html_extractor(test_url) self.assertTrue(len(response) > 15)
def test_attribute_exception(self): test_url = "http://www.watinha.com" self.assertEquals(html_utilities.html_extractor(""), "") response = html_utilities.html_extractor(test_url) self.assertTrue(len(response) > 3)
def get_css(self, url): return html_extractor(url)