Ejemplo n.º 1
0
def readURLFile(filename):
    # Read in list of urls from file
    url_list = open(filename).readlines()

    # Build a list of canonicalized versions of the urls
    canon_url_list = buildCanonicalList(url_list)

    # Print out url information
    for url in url_list:
        print "Source:\n" + url
        print "Valid: " + str(url_validate(url))
        print "Canonical: " + url_canonicalize(url)
        print "Source unique: " + str((url_list.count(url) == 1))
        print "Canonicalized URL unique: " + str((canon_url_list.count(url_canonicalize(url)) == 1))
Ejemplo n.º 2
0
def readURLFile(filename):
    # Read in list of urls from file
    url_list = open(filename).readlines()

    # Build a list of canonicalized versions of the urls
    canon_url_list = buildCanonicalList(url_list)

    # Print out url information
    for url in url_list:
        print "Source:\n" + url
        print "Valid: " + str(url_validate(url))
        print "Canonical: " + url_canonicalize(url)
        print "Source unique: " + str((url_list.count(url) == 1))
        print "Canonicalized URL unique: " + str(
            (canon_url_list.count(url_canonicalize(url)) == 1))
Ejemplo n.º 3
0
 def runTest(self):
     assert (url_validate(value) == expected), (expected, value, url_validate(value))