Ejemplo n.º 1
0
 def test_removeWWWdot(self):
     urls = ['http://www.google.com//path//..///path////////////']
             
     expected = ['http://google.com/path/']
     
     for i in range(0,len(urls)):
         uc = UrlCanonicalizer()
         actual = uc.canonicalizeUrl(urls[i])
         self.assertEqual(expected[i], actual, \
                          'fail on url: ' + urls[i] + '\n' +\
                          'expected: ' + expected[i] + '\n' +\
                          'actual  : ' + actual)  
Ejemplo n.º 2
0
 def test_addtrailingslash(self):
     urls = ['http://google.com/path',\
             'http://*****:*****@en.wIkipediA.org:0/wiki/Unit_testing/%4f%4F#Language-']
     expected = ['http://google.com/path/',\
                 'http://en.wikipedia.org/wiki/Unit_testing/OO/']
     
     for i in range(0,len(urls)):
         uc = UrlCanonicalizer()
         actual = uc.canonicalizeUrl(urls[i])
         self.assertEqual(expected[i], actual, \
                          'fail on url: ' + urls[i] + '\n' +\
                          'expected: ' + expected[i] + '\n' +\
                          'actual  : ' + actual)       
Ejemplo n.º 3
0
 def test_removeUserPassword(self):
     urls = ['hunlan:[email protected]:80/hunlan%40gmail%2ecom',\
             'http://*****:*****@en.wIkipediA.org:0/wiki/Unit_testing/%4f%4F#Language-']
     expected = ['google.com/[email protected]/',\
                 'http://en.wikipedia.org/wiki/Unit_testing/OO/']
     
     for i in range(0,len(urls)):
         uc = UrlCanonicalizer()
         actual = uc.canonicalizeUrl(urls[i])
         self.assertEqual(expected[i], actual, \
                          'fail on url: ' + urls[i] + '\n' +\
                          'expected: ' + expected[i] + '\n' +\
                          'actual  : ' + actual)        
Ejemplo n.º 4
0
 def test_lowercaseHostName(self):
     urls = ['www.GoOgLE.com',\
             'http://en.wIkipediA.org/wiki/Unit_testing#Language-']
     expected = ['google.com/',\
                 'http://en.wikipedia.org/wiki/Unit_testing/']
     
     for i in range(0,len(urls)):
         uc = UrlCanonicalizer()
         actual = uc.canonicalizeUrl(urls[i])
         self.assertEqual(expected[i], actual, \
                          'fail on url: ' + urls[i] + '\n' +\
                          'expected: ' + expected[i] + '\n' +\
                          'actual  : ' + actual)
Ejemplo n.º 5
0
 def test_removeDupSlashes(self):
     urls = ['http://google.com//path//..///path////////////',\
             'http://*****:*****@en.wIkipediA.org:0//wiki/Unit_testing/%2e%2e#Language-']
     expected = ['http://google.com/path/',\
                 'http://en.wikipedia.org/wiki/']
     
     for i in range(0,len(urls)):
         uc = UrlCanonicalizer()
         actual = uc.canonicalizeUrl(urls[i])
         self.assertEqual(expected[i], actual, \
                          'fail on url: ' + urls[i] + '\n' +\
                          'expected: ' + expected[i] + '\n' +\
                          'actual  : ' + actual)       
Ejemplo n.º 6
0
 def test_wikiexample(self):
     urls = ['http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations',\
             'http://en.wikipedia.org/wiki/Unit_testing#Language-']
     expected = ['http://en.wikipedia.org/wiki/Unit_testing/',\
                 'http://en.wikipedia.org/wiki/Unit_testing/']
     
     for i in range(0,len(urls)):
         uc = UrlCanonicalizer()
         actual = uc.canonicalizeUrl(urls[i])
         self.assertEqual(expected[i], actual, \
                          'fail on url: ' + urls[i] + '\n' +\
                          'expected: ' + expected[i] + '\n' +\
                          'actual  : ' + actual)
Ejemplo n.º 7
0
 def test_decodePercentEncoding(self):
     urls = ['www.GoOgLE.com/hunlan%40gmail%2ecom',\
             'cs.washington.edu/%43%53%45%34%30%33',\
             'http://en.wIkipediA.org/wiki/Unit_testing/%4f%4F#Language-']
     expected = ['google.com/[email protected]/',\
                 'cs.washington.edu/CSE403/',\
                 'http://en.wikipedia.org/wiki/Unit_testing/OO/']
     
     for i in range(0,len(urls)):
         uc = UrlCanonicalizer()
         actual = uc.canonicalizeUrl(urls[i])
         self.assertEqual(expected[i], actual, \
                          'fail on url: ' + urls[i] + '\n' +\
                          'expected: ' + expected[i] + '\n' +\
                          'actual  : ' + actual)    
Ejemplo n.º 8
0
 def test_sortAndUseAndSignForQuery(self):
     urls = ['www.nba.com?a=0;A=1;a=d',\
             'http://google.com//path//..///path////////////?b=2;a=1',\
             'http://*****:*****@en.wIkipediA.org:0//wiki/Unit_testing/%2e%2e?a=0;c=1&B=2#Language-']
     expected = ['nba.com/?a=0&a=1&a=d',\
                 'http://google.com/path/?a=1&b=2',\
                 'http://en.wikipedia.org/wiki/?a=0&b=2&c=1']
     
     for i in range(0,len(urls)):
         uc = UrlCanonicalizer()
         actual = uc.canonicalizeUrl(urls[i])
         self.assertEqual(expected[i], actual, \
                          'fail on url: ' + urls[i] + '\n' +\
                          'expected: ' + expected[i] + '\n' +\
                          'actual  : ' + actual)