Exemple #1
0
class IPv4Test(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()
        
        
    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['127.0.0.1:8000/urls',\
                '127.0.0.1']
        expected = [':8000/urls',\
                    '']
        for i in range(0, len(expected)):
            ret = self.urlValidator._checkAndRemoveIPv4(urls[i])
            self.assertEqual(expected[i], ret)
            
    # input: incorrect input
    # expected: None
    def test_illegalIPv4(self):
        urls = ['127.0.1.',\
                '127.0.1', \
                '0.0.0.256',\
                '0.0.0.-1',\
                'a.b.c.d',\
                '127.0.0.0.1',\
                '123',\
                '']
        for url in urls:
            ret = self.urlValidator._checkAndRemoveIPv4(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
Exemple #2
0
class DomainNameTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['www.google.com/images',\
                'google.com',\
                'wiki.org/info',\
                'cs.washington.edu/cse403']
        expected = ['/images',\
                    '',\
                    '/info',\
                    '/cse403']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isDomainNameValid(urls[i])
            self.assertEqual(expected[i], ret)

    # input: incorrect input
    # expected: None
    def test_illegalDomainName(self):
        urls = ['google/images',\
                'www.google.google.com',\
                'www.g**gle.com',\
                'cs.washington.ed/cse403',\
                '']
        for url in urls:
            ret = self.urlValidator._isDomainNameValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
Exemple #3
0
class IPv4Test(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['127.0.0.1:8000/urls',\
                '127.0.0.1']
        expected = [':8000/urls',\
                    '']
        for i in range(0, len(expected)):
            ret = self.urlValidator._checkAndRemoveIPv4(urls[i])
            self.assertEqual(expected[i], ret)

    # input: incorrect input
    # expected: None
    def test_illegalIPv4(self):
        urls = ['127.0.1.',\
                '127.0.1', \
                '0.0.0.256',\
                '0.0.0.-1',\
                'a.b.c.d',\
                '127.0.0.0.1',\
                '123',\
                '']
        for url in urls:
            ret = self.urlValidator._checkAndRemoveIPv4(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
Exemple #4
0
class UserPasswordTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()
    # input: correct username/password
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['hunlan:[email protected]']
        expected = ['gmail.com']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isUserPasswordValid(urls[i])
            self.assertEqual(expected[i], ret)
            
    # input: incorrect username/password
    # expected: None
    def test_illegalUsernamePassword(self):
        urls = ['*****@*****.**',\
                ':[email protected]',\
                'hunlan:pass:[email protected]',\
                'hunlan:****@gmail.com',\
                '@gmail.com']
        for url in urls:
            ret = self.urlValidator._isUserPasswordValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
    
    # input: no username and password
    # expected: non-None
    def test_noUsernamePassword(self):
        urls = ['www.gmail.com']
        expected = ['www.gmail.com']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isUserPasswordValid(urls[i])
            self.assertEqual(expected[i], ret)      
Exemple #5
0
class DomainNameTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()
        
    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['www.google.com/images',\
                'google.com',\
                'wiki.org/info',\
                'cs.washington.edu/cse403']
        expected = ['/images',\
                    '',\
                    '/info',\
                    '/cse403']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isDomainNameValid(urls[i])
            self.assertEqual(expected[i], ret)
            
    # input: incorrect input
    # expected: None
    def test_illegalDomainName(self):
        urls = ['google/images',\
                'www.google.google.com',\
                'www.g**gle.com',\
                'cs.washington.ed/cse403',\
                '']
        for url in urls:
            ret = self.urlValidator._isDomainNameValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
Exemple #6
0
class SchemeTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()
        
    # input: correct url
    # expected: non-None
    def test_basicCorrectSituation(self):
        valid_scheme_url = ['http://www.google.com', \
                            'https://www.google.com', \
                            'ftp://www.google.com']
        expected = ['www.google.com', \
                    'www.google.com', \
                    'www.google.com']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isSchemeNameValid(valid_scheme_url[i])
            self.assertEqual(expected[i], ret)
    
    # input: incorrect url
    # expected: original url
    def test_wrongSchemeType(self):
        invalid_scheme_url = ['htp://www.google.com', \
                              'www://www.google.com', \
                              'ftp:/www.google.com', \
                              'http//www.google.com']
        for url in invalid_scheme_url:
            ret = self.urlValidator._isSchemeNameValid(url)
            self.assertEqual(url, ret)
        
   # input: no scheme url
    # expected: original url  
    def test_inputWithoutSchemeType(self):
        no_scheme_url = ['', 'www.google.com']
        for url in no_scheme_url:
            ret = self.urlValidator._isSchemeNameValid(url)
            self.assertEqual(url, ret)
Exemple #7
0
class UserPasswordTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    # input: correct username/password
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['hunlan:[email protected]']
        expected = ['gmail.com']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isUserPasswordValid(urls[i])
            self.assertEqual(expected[i], ret)

    # input: incorrect username/password
    # expected: None
    def test_illegalUsernamePassword(self):
        urls = ['*****@*****.**',\
                ':[email protected]',\
                'hunlan:pass:[email protected]',\
                'hunlan:****@gmail.com',\
                '@gmail.com']
        for url in urls:
            ret = self.urlValidator._isUserPasswordValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)

    # input: no username and password
    # expected: non-None
    def test_noUsernamePassword(self):
        urls = ['www.gmail.com']
        expected = ['www.gmail.com']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isUserPasswordValid(urls[i])
            self.assertEqual(expected[i], ret)
Exemple #8
0
class FragmentTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['#fragment',\
                '#_-_',\
                '#']
        expected = ['',\
                    '',\
                    '']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isFragmentValid(urls[i])
            self.assertEqual(expected[i], ret, 'wrong val at url: ' + urls[i])

    # input: incorrect input
    # expected: None
    def test_illegalPath(self):
        urls = ['##',\
                '#omg this is a fragment?', \
                '#www.google.com']
        for url in urls:
            ret = self.urlValidator._isFragmentValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)

    # input: no Fragment
    # expected: original url
    def test_noFragment(self):
        urls = ['']
        for url in urls:
            ret = self.urlValidator._isFragmentValid(url)
            self.assertEqual(url, ret, 'wrong val at url: ' + url)
 def compareNormalizeUrl(urlA, urlB, raiseException=True):
     uvA = UrlValidator()
     uvB = UrlValidator()
     
     if not uvA.validate(urlA):
         if raiseException:
             raise Exception('Invalid urlA')
         else:
             return -1
     
     if not uvB.validate(urlB):
         if raiseException:
             raise Exception('Invalid urlB')
         else:
             return 1
     
     ucA = UrlCanonicalizer()
     ucB = UrlCanonicalizer()
     
     yourlA = ucA.canonicalizerValidator(uvA)
     yourlB = ucB.canonicalizerValidator(uvB)
     
     if yourlA < yourlB:
         return -1
     elif yourlA > yourlB:
         return 1
     else:
         return 0
Exemple #10
0
    def __getNormalizedUrl(self):
        yourl = self.urls[:]
        ret = []
        for url in yourl:
            uv = UrlValidator()
            if uv.validate(url):
                uc = UrlCanonicalizer()
                ret.append(uc.canonicalizerValidator(uv))
            else:
                ret.append(None)

        return ret
 def __getNormalizedUrl(self):
     yourl = self.urls[:]
     ret = []
     for url in yourl:
         uv = UrlValidator()
         if uv.validate(url):
             uc = UrlCanonicalizer()
             ret.append(uc.canonicalizerValidator(uv))
         else:
             ret.append(None)
         
     return ret
Exemple #12
0
class PathTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()
        
        
    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['/urls?q=a',\
                '//urls//nba/com',\
                '/change%2bmy%2bmood',\
                '/change_my-mood',\
                '/../',\
                '/./',\
                '']
        expected = ['?q=a',\
                    '',\
                    '',\
                    '',\
                    '',\
                    '']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isPathValid(urls[i])
            self.assertEqual(expected[i], ret)
            
    # input: incorrect input
    # expected: None
    def test_illegalPath(self):
        urls = ['/change*my*mood',\
                '/change%xxmymood', \
                '/change%', \
                '/change./', \
                '/ha ha did this slip the test?']
        for url in urls:
            ret = self.urlValidator._isPathValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
            
    # input: no path
    # expected: original url 
    def test_noPath(self):
        urls = ['?q=a',\
                '']
        for url in urls:
            ret = self.urlValidator._isPathValid(url)
            self.assertEqual(url, ret, 'wrong val at url: ' + url)          
Exemple #13
0
class PathTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['/urls?q=a',\
                '//urls//nba/com',\
                '/change%2bmy%2bmood',\
                '/change_my-mood',\
                '/../',\
                '/./',\
                '']
        expected = ['?q=a',\
                    '',\
                    '',\
                    '',\
                    '',\
                    '']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isPathValid(urls[i])
            self.assertEqual(expected[i], ret)

    # input: incorrect input
    # expected: None
    def test_illegalPath(self):
        urls = ['/change*my*mood',\
                '/change%xxmymood', \
                '/change%', \
                '/change./', \
                '/ha ha did this slip the test?']
        for url in urls:
            ret = self.urlValidator._isPathValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)

    # input: no path
    # expected: original url
    def test_noPath(self):
        urls = ['?q=a',\
                '']
        for url in urls:
            ret = self.urlValidator._isPathValid(url)
            self.assertEqual(url, ret, 'wrong val at url: ' + url)
 def _removeWWWDot(self, url):
     if not url.startswith('www.'):
         return url[:]
     
     allOccurance = UrlValidator.getAllOccurance(url, '.')
     if len(allOccurance) < 2:
         # case of www.com
         return url[:]
     
     return url[4:]
    def compareNormalizeUrl(urlA, urlB, raiseException=True):
        uvA = UrlValidator()
        uvB = UrlValidator()

        if not uvA.validate(urlA):
            if raiseException:
                raise Exception('Invalid urlA')
            else:
                return -1

        if not uvB.validate(urlB):
            if raiseException:
                raise Exception('Invalid urlB')
            else:
                return 1

        ucA = UrlCanonicalizer()
        ucB = UrlCanonicalizer()

        yourlA = ucA.canonicalizerValidator(uvA)
        yourlB = ucB.canonicalizerValidator(uvB)

        if yourlA < yourlB:
            return -1
        elif yourlA > yourlB:
            return 1
        else:
            return 0
Exemple #16
0
class PortTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()
        
        
    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = [':8000/urls',\
                ':65535',\
                ':0000000000000000000000000000000000000000000000065535',\
                '']
        expected = ['/urls',\
                    '']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isPortNumberValid(urls[i])
            self.assertEqual(expected[i], ret)
            
    # input: incorrect input
    # expected: None
    def test_illegalPort(self):
        urls = ['::',\
                ':abc', \
                ':-1',\
                ':',\
                ':65536',\
                ':065536']
        for url in urls:
            ret = self.urlValidator._isPortNumberValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
            
    # input: no port
    # expected: original url 
    def test_noPort(self):
        urls = ['/url/paths',\
                '']
        for url in urls:
            ret = self.urlValidator._isPortNumberValid(url)
            self.assertEqual(url, ret, 'wrong val at url: ' + url)
Exemple #17
0
class QueryTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()
        
        
    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['?q=a#fragment',\
                '?a=1&b=2;c=3',\
                '?',\
                '?#fragment']
        expected = ['#fragment',\
                    '',\
                    '',\
                    '#fragment']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isQueryValid(urls[i])
            self.assertEqual(expected[i], ret, 'wrong val at url: ' + urls[i])
            
    # input: incorrect input
    # expected: None
    def test_illegalPath(self):
        urls = ['?a', \
                '?1=2', \
                '?a=*', \
                '?a=1,b=2']
        for url in urls:
            ret = self.urlValidator._isQueryValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
            
    # input: no Query
    # expected: original url 
    def test_noQuery(self):
        urls = ['#fragment',\
                '']
        for url in urls:
            ret = self.urlValidator._isQueryValid(url)
            self.assertEqual(url, ret, 'wrong val at url: ' + url)     
Exemple #18
0
class PortTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = [':8000/urls',\
                ':65535',\
                ':0000000000000000000000000000000000000000000000065535',\
                '']
        expected = ['/urls',\
                    '']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isPortNumberValid(urls[i])
            self.assertEqual(expected[i], ret)

    # input: incorrect input
    # expected: None
    def test_illegalPort(self):
        urls = ['::',\
                ':abc', \
                ':-1',\
                ':',\
                ':65536',\
                ':065536']
        for url in urls:
            ret = self.urlValidator._isPortNumberValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)

    # input: no port
    # expected: original url
    def test_noPort(self):
        urls = ['/url/paths',\
                '']
        for url in urls:
            ret = self.urlValidator._isPortNumberValid(url)
            self.assertEqual(url, ret, 'wrong val at url: ' + url)
Exemple #19
0
class QueryTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['?q=a#fragment',\
                '?a=1&b=2;c=3',\
                '?',\
                '?#fragment']
        expected = ['#fragment',\
                    '',\
                    '',\
                    '#fragment']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isQueryValid(urls[i])
            self.assertEqual(expected[i], ret, 'wrong val at url: ' + urls[i])

    # input: incorrect input
    # expected: None
    def test_illegalPath(self):
        urls = ['?a', \
                '?1=2', \
                '?a=*', \
                '?a=1,b=2']
        for url in urls:
            ret = self.urlValidator._isQueryValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)

    # input: no Query
    # expected: original url
    def test_noQuery(self):
        urls = ['#fragment',\
                '']
        for url in urls:
            ret = self.urlValidator._isQueryValid(url)
            self.assertEqual(url, ret, 'wrong val at url: ' + url)
Exemple #20
0
class FragmentTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()
        
        
    # input: correct input
    # expected: non-None
    def test_basicCorrectSituation(self):
        urls = ['#fragment',\
                '#_-_',\
                '#']
        expected = ['',\
                    '',\
                    '']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isFragmentValid(urls[i])
            self.assertEqual(expected[i], ret, 'wrong val at url: ' + urls[i])
            
    # input: incorrect input
    # expected: None
    def test_illegalPath(self):
        urls = ['##',\
                '#omg this is a fragment?', \
                '#www.google.com']
        for url in urls:
            ret = self.urlValidator._isFragmentValid(url)
            self.assertEqual(None, ret, 'wrong val at url: ' + url)
            
    # input: no Fragment
    # expected: original url 
    def test_noFragment(self):
        urls = ['']
        for url in urls:
            ret = self.urlValidator._isFragmentValid(url)
            self.assertEqual(url, ret, 'wrong val at url: ' + url)   
            
            
Exemple #21
0
class SchemeTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    # input: correct url
    # expected: non-None
    def test_basicCorrectSituation(self):
        valid_scheme_url = ['http://www.google.com', \
                            'https://www.google.com', \
                            'ftp://www.google.com']
        expected = ['www.google.com', \
                    'www.google.com', \
                    'www.google.com']
        for i in range(0, len(expected)):
            ret = self.urlValidator._isSchemeNameValid(valid_scheme_url[i])
            self.assertEqual(expected[i], ret)

    # input: incorrect url
    # expected: original url
    def test_wrongSchemeType(self):
        invalid_scheme_url = ['htp://www.google.com', \
                              'www://www.google.com', \
                              'ftp:/www.google.com', \
                              'http//www.google.com']
        for url in invalid_scheme_url:
            ret = self.urlValidator._isSchemeNameValid(url)
            self.assertEqual(url, ret)

# input: no scheme url
# expected: original url

    def test_inputWithoutSchemeType(self):
        no_scheme_url = ['', 'www.google.com']
        for url in no_scheme_url:
            ret = self.urlValidator._isSchemeNameValid(url)
            self.assertEqual(url, ret)
    def canonicalizeUrl(self, url):
        uv = UrlValidator()
        if not uv.validate(url):
            raise Exception('invalid url')

        return self.canonicalizerValidator(uv)
Exemple #23
0
class UrlValidatorTest(TestCase):
    # setup urlvalidator
    def setUp(self):
        self.urlValidator = UrlValidator()
    
    # wiki example validation, expect true
    def test_wikiexample(self):
        urls = ['http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations',\
                'http://en.wikipedia.org/wiki/Unit_testing#Language-']
        
        for url in urls:
            self.assertTrue(self.urlValidator.validate(url))
    
    # check param input
    def test_illegalinput(self):
        with self.assertRaises(AssertionError) as err:
            self.urlValidator.validate(None)
            
        with self.assertRaises(AssertionError) as err:
            self.urlValidator.validate(123)
            
    # check empty string
    def test_emptystring(self):
        self.assertFalse(self.urlValidator.validate(''))
    
    # scheme
    def test_correct_incorrect_scheme(self):
        correct_list = ['http://www.google.com', \
                        'ftp://www.google.com', \
                        'www.google.com']    
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url), 'fail at url = ' + url)
            
        incorrect_list = ['htp://www.google.com', \
                        '://www.google.com', \
                        'http:/www.google.com', \
                        'http//www.google.com']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url), 'fail at url = ' + url)
            
    # uname pword      
    def test_correct_incorrect_usernamepassword(self):
        correct_list = ['http://*****:*****@www.google.com', \
                        'http://www.google.com']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url), 'fail at url = ' + url)
            
        incorrect_list = ['http://[email protected]', \
                          'http://*****:*****@www.google.com', \
                          'http://@www.google.com', \
                          'http://@@www.google.com']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url), 'fail at url = ' + url)
    
    # dname
    def test_correct_incorrect_domainname(self):
        correct_list = ['http://google.com', \
                        'http://cs.washington.edu/path', \
                        'http://555.com']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url), 'fail at url = ' + url)
            
        incorrect_list = ['http://.com', \
                          'http://*****:*****@nba.com', \
                          'http://www.google.com/images%', \
                          'http://www.google.com/%2x/']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url)) 
            
    # query
    def test_correct_incorrect_query(self):
        correct_list = ['http://127.0.0.1:8000/url//nba/videos///?nba=cool', \
                        'http://127.0.0.1:8000/%2b?key=val1&key2=val2;key3=3#frag', \
                        'http://www.google.com:80/path/', \
                        'http://www.google.com:80/path/?', \
                        'http://www.google.com:80/path?']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url), 'fail at url = ' + url)  
            
        incorrect_list = ['http://www.google.com?nba', \
                          'http://www.google.com/??', \
                          'http://www.google.com/?cmm = cmm', \
                          'http://www.google.com/?key==value', \
                          'http://www.google.com/?1a=1b']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url)) 
            
    # fragment
    def test_correct_incorrect_fragment(self):
        correct_list = ['http://127.0.0.1:8000/url//nba/videos///?nba=cool#fragment', \
                        'http://127.0.0.1:8000/%2b?key=val1&key2=val2;key3=3#_-_', \
                        'http://127.0.0.1:8000/%2b?key=val1&key2=val2;key3=3#', \
                        'http://www.google.com:80/path/']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url), 'fail at url = ' + url)  
            
        incorrect_list = ['http://www.google.com?nba#wrong fragment', \
                          'http://www.google.com/##']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url)) 
Exemple #24
0
 def setUp(self):
     self.urlValidator = UrlValidator()
Exemple #25
0
 urls = []
 line = infile.readline()
 while len(line) > 0:
     # take out next line characters
     if line.endswith('\n'):
         line = line[:-1]
     urls.append(line)
     line = infile.readline()
 
 # filter out empty strings
 urls = filter(lambda s: s.strip(), urls)
 
 # process each url 
 for url in urls:
     # url valid
     uv = UrlValidator()
     isValid = uv.validate(url)
             
     # remove url in urls
     wo_url_in_urls = urls[:]
     wo_url_in_urls.remove(url)
     
     # initialize param
     normURL = None
     isSrcUnique = UrlComparator.isSourceUnique(url, wo_url_in_urls)
     isNormUnique = None
     
     if isValid:
         uc = UrlCanonicalizer()
         normURL = uc.canonicalizerValidator(uv)
         isNormUnique = UrlComparator.isNormalizeUnique(url, wo_url_in_urls, False)
Exemple #26
0
 def setUp(self):
     self.urlValidator = UrlValidator()
Exemple #27
0
class UrlValidatorTest(TestCase):
    def setUp(self):
        self.urlValidator = UrlValidator()

    def test_wikiexample(self):
        urls = ['http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations',\
                'http://en.wikipedia.org/wiki/Unit_testing#Language-']

        for url in urls:
            self.assertTrue(self.urlValidator.validate(url))

    # check param input
    def test_illegalinput(self):
        with self.assertRaises(AssertionError) as err:
            self.urlValidator.validate(None)

        with self.assertRaises(AssertionError) as err:
            self.urlValidator.validate(123)

    # check empty string
    def test_emptystring(self):
        self.assertFalse(self.urlValidator.validate(''))

    # scheme
    def test_correct_incorrect_scheme(self):
        correct_list = ['http://www.google.com', \
                        'ftp://www.google.com', \
                        'www.google.com']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url),
                            'fail at url = ' + url)

        incorrect_list = ['htp://www.google.com', \
                        '://www.google.com', \
                        'http:/www.google.com', \
                        'http//www.google.com']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url),
                             'fail at url = ' + url)

    # uname pword
    def test_correct_incorrect_usernamepassword(self):
        correct_list = ['http://*****:*****@www.google.com', \
                        'http://www.google.com']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url),
                            'fail at url = ' + url)

        incorrect_list = ['http://[email protected]', \
                          'http://*****:*****@www.google.com', \
                          'http://@www.google.com', \
                          'http://@@www.google.com']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url),
                             'fail at url = ' + url)

    # dname
    def test_correct_incorrect_domainname(self):
        correct_list = ['http://google.com', \
                        'http://cs.washington.edu/path', \
                        'http://555.com']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url),
                            'fail at url = ' + url)

        incorrect_list = ['http://.com', \
                          'http://*****:*****@nba.com', \
                          'http://www.google.com/images%', \
                          'http://www.google.com/%2x/']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url))

    # query
    def test_correct_incorrect_query(self):
        correct_list = ['http://127.0.0.1:8000/url//nba/videos///?nba=cool', \
                        'http://127.0.0.1:8000/%2b?key=val1&key2=val2;key3=3#frag', \
                        'http://www.google.com:80/path/', \
                        'http://www.google.com:80/path/?', \
                        'http://www.google.com:80/path?']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url),
                            'fail at url = ' + url)

        incorrect_list = ['http://www.google.com?nba', \
                          'http://www.google.com/??', \
                          'http://www.google.com/?cmm = cmm', \
                          'http://www.google.com/?key==value', \
                          'http://www.google.com/?1a=1b']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url))

    # fragment
    def test_correct_incorrect_fragment(self):
        correct_list = ['http://127.0.0.1:8000/url//nba/videos///?nba=cool#fragment', \
                        'http://127.0.0.1:8000/%2b?key=val1&key2=val2;key3=3#_-_', \
                        'http://127.0.0.1:8000/%2b?key=val1&key2=val2;key3=3#', \
                        'http://www.google.com:80/path/']
        for url in correct_list:
            self.assertTrue(self.urlValidator.validate(url),
                            'fail at url = ' + url)

        incorrect_list = ['http://www.google.com?nba#wrong fragment', \
                          'http://www.google.com/##']
        for url in incorrect_list:
            self.assertFalse(self.urlValidator.validate(url))