def readURLFile(filename):
    # Read in list of urls from file
    url_list = open(filename).readlines()

    # Build a list of canonicalized versions of the urls
    canon_url_list = buildCanonicalList(url_list)

    # Print out url information
    for url in url_list:
        print "Source:\n" + url
        print "Valid: " + str(url_validate(url))
        print "Canonical: " + url_canonicalize(url)
        print "Source unique: " + str((url_list.count(url) == 1))
        print "Canonicalized URL unique: " + str((canon_url_list.count(url_canonicalize(url)) == 1))
Example #2
0
def readURLFile(filename):
    # Read in list of urls from file
    url_list = open(filename).readlines()

    # Build a list of canonicalized versions of the urls
    canon_url_list = buildCanonicalList(url_list)

    # Print out url information
    for url in url_list:
        print "Source:\n" + url
        print "Valid: " + str(url_validate(url))
        print "Canonical: " + url_canonicalize(url)
        print "Source unique: " + str((url_list.count(url) == 1))
        print "Canonicalized URL unique: " + str(
            (canon_url_list.count(url_canonicalize(url)) == 1))
Example #3
0
def buildCanonicalList(url_list):
    canon_url_list = []

    for url in url_list:
        canon_url_list.append(url_canonicalize(url))

    return canon_url_list
def buildCanonicalList(url_list):
    canon_url_list = []

    for url in url_list:
        canon_url_list.append(url_canonicalize(url))

    return canon_url_list
Example #5
0
 def runTest(self):
     assert url_canonicalize(original) == normalized, (original, normalized, url_canonicalize(original))
Example #6
0
 def runTest(self):
     assert (url_canonicalize(value) == value) == expected, (expected, value, url_canonicalize(value))