def test_all(self): self.assertTrue( is_valid_url( "http://google.com:80/blah/blah?this=test&blah&blah=#icky")) self.assertFalse( is_valid_url( "htt?p://google.z:800/bla()/?this is a = test#blah test"))
def test_with_port(self): self.assertTrue(is_valid_url("http://google.com:8000")) self.assertFalse(is_valid_url("http://google.com:")) self.assertFalse(is_valid_url("http://google.com:100000"))
def test_scheme_and_domain_is_valid_url(self): self.assertTrue(is_valid_url("http://google.com")) self.assertTrue(is_valid_url("http://www.google.com")) self.assertTrue(is_valid_url("https://google.com"))
def test_empty(self): self.assertFalse(is_valid_url(""))
def test_all(self): self.assertTrue(is_valid_url("http://google.com:80/blah/blah?this=test&blah&blah=#icky")) self.assertFalse(is_valid_url("htt?p://google.z:800/bla()/?this is a = test#blah test"))
try: lines = infile.read().splitlines() except IOError, e: parser.error(str(e)) finally: infile.close() infile = None source_counter = Counter(lines) canonical_lines = map(canonicalize, lines) canonical_counter = Counter(canonical_lines) for index in range(0, len(lines)): url = lines[index] curl = canonical_lines[index] valid = is_valid_url(url) print "Source: " + url print "Valid: " + ("true" if valid else "false") if valid: print "Canonical: " + curl print "Source unique: " + ("true" if source_counter[url] == 1 else "false") if valid: print "Canonicalized URL unique: " + ( "true" if canonical_counter[curl] == 1 else "false") # write output try: for line in lines: outfile.write(line) except IOError, e:
try: lines = infile.read().splitlines() except IOError, e: parser.error(str(e)) finally: infile.close() infile = None source_counter = Counter(lines) canonical_lines = map(canonicalize, lines) canonical_counter = Counter(canonical_lines) for index in range(0, len(lines)): url = lines[index] curl = canonical_lines[index] valid = is_valid_url(url) print "Source: " + url print "Valid: " + ("true" if valid else "false") if valid: print "Canonical: " + curl print "Source unique: " + ("true" if source_counter[url] == 1 else "false") if valid: print "Canonicalized URL unique: " + ("true" if canonical_counter[curl] == 1 else "false") # write output try: for line in lines: outfile.write(line) except IOError, e: parser.error(str(e)) finally: