def validate_url(url): """Check if the page is final. Bad code, should be parsed here.""" # TODO: remove parsing from here g = grab.Grab() g.go(url) if g.doc.select('//div[@class="col-sub"]/div[@class="prod-id"]/span/text()').text(): return clean_url(url, fragment=False, query=False, params=False) raise InvalidDeepLink(url)
def validate_url(url): """Check if the page is final. Bad code, should be parsed here.""" # TODO: remove parsing from here session = Session() res = session.get(url).text page = html.fromstring(res) if page.xpath('//span[@class="b-art__num"]/text()'): return clean_url(url, fragment=False, query=False, params=False) raise InvalidDeepLink(url)
def is_valid(url): """Check if the page is final. Bad code, should be parsed here.""" # TODO: remove parsing from here session = Session() res = session.get(url).text page = html.fromstring(res) if page.xpath('//div[@class="col-sub"]/div[@class="prod-id"]/span/text()') != []: return clean_url(url, fragment=False, query=False, params=False) raise InvalidDeepLink(url)
def validate_url(url): """Check if the page is final. Bad code, should be parsed here.""" # TODO: remove parsing from here g = grab.Grab() _change_city(g) g.go(url) if g.doc.select('//ul[contains(@class,"product_info_switcher")]').exists(): return clean_url(url, fragment=False, query=False, params=False) raise InvalidDeepLink(url)
def test_main(self): self.assertEqual(functions.clean_query(self.url, "a"), "http://example.com/path?a=1") self.assertEqual(functions.clean_query(self.url, "a", "c"), "http://example.com/path?a=1&c=3") self.assertEqual(functions.clean_query(self.url, "a", "b", "c"), "http://example.com/path?a=1&b=2&c=3") self.assertEqual(functions.clean_query(self.url, "a", "b", "c", "d"), self.url) # equal clean_url(url, query=False) self.assertEqual(functions.clean_query(self.url), "http://example.com/path") self.assertEqual(functions.clean_query(self.url), functions.clean_url(self.url, query=False))
def test_clean_params(self): self.assertEqual(functions.clean_url(self.url, params=False), "http://example.com/path/?a=1&b=2&c=3#fragment")
def test_clean_query(self): self.assertEqual(functions.clean_url(self.url, query=False), "http://example.com/path/;params#fragment")
def test_clean_fragment(self): self.assertEqual(functions.clean_url(self.url, fragment=False), "http://example.com/path/;params?a=1&b=2&c=3")