Ejemplo n.º 1
0
    def test_sourceUnique(self):
        url = 'www.google.com'
        list1 = ['google.com', 'http://google.com']
        list2 = ['www.google.com', 'something.net']

        self.assertTrue(UrlComparator.isSourceUnique(url, list1))
        self.assertFalse(UrlComparator.isSourceUnique(url, list2))
Ejemplo n.º 2
0
 def test_sourceUnique(self):
     url = 'www.google.com'
     list1 = ['google.com', 'http://google.com']
     list2 = ['www.google.com', 'something.net']
     
     self.assertTrue(UrlComparator.isSourceUnique(url, list1))
     self.assertFalse(UrlComparator.isSourceUnique(url, list2))
Ejemplo n.º 3
0
 # filter out empty strings
 urls = filter(lambda s: s.strip(), urls)
 
 # process each url 
 for url in urls:
     # url valid
     uv = UrlValidator()
     isValid = uv.validate(url)
             
     # remove url in urls
     wo_url_in_urls = urls[:]
     wo_url_in_urls.remove(url)
     
     # initialize param
     normURL = None
     isSrcUnique = UrlComparator.isSourceUnique(url, wo_url_in_urls)
     isNormUnique = None
     
     if isValid:
         uc = UrlCanonicalizer()
         normURL = uc.canonicalizerValidator(uv)
         isNormUnique = UrlComparator.isNormalizeUnique(url, wo_url_in_urls, False)
         
     print 'Source: ' + url
     print 'Valid: ' + str(isValid)
     print 'Canonical: ' + ('None' if normURL == None else normURL)
     print 'Source unique: ' + str(isSrcUnique)
     print 'Canonicalized URL unique: ' + ('N/A' if isNormUnique == None else str(isNormUnique))
     
     print ''