def readURLFile(filename): # Read in list of urls from file url_list = open(filename).readlines() # Build a list of canonicalized versions of the urls canon_url_list = buildCanonicalList(url_list) # Print out url information for url in url_list: print "Source:\n" + url print "Valid: " + str(url_validate(url)) print "Canonical: " + url_canonicalize(url) print "Source unique: " + str((url_list.count(url) == 1)) print "Canonicalized URL unique: " + str((canon_url_list.count(url_canonicalize(url)) == 1))
def readURLFile(filename): # Read in list of urls from file url_list = open(filename).readlines() # Build a list of canonicalized versions of the urls canon_url_list = buildCanonicalList(url_list) # Print out url information for url in url_list: print "Source:\n" + url print "Valid: " + str(url_validate(url)) print "Canonical: " + url_canonicalize(url) print "Source unique: " + str((url_list.count(url) == 1)) print "Canonicalized URL unique: " + str( (canon_url_list.count(url_canonicalize(url)) == 1))
def buildCanonicalList(url_list): canon_url_list = [] for url in url_list: canon_url_list.append(url_canonicalize(url)) return canon_url_list
def runTest(self): assert url_canonicalize(original) == normalized, (original, normalized, url_canonicalize(original))
def runTest(self): assert (url_canonicalize(value) == value) == expected, (expected, value, url_canonicalize(value))