コード例 #1
0
ファイル: tests.py プロジェクト: hunlan/cse403sortingoption
 def test_normunique(self):
     url = 'http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations'
     # same url
     list1 = ['http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations']
     
     # norm same url
     list2 = ['http://en.wikipedia.org/wiki/Unit_testing#Language-']
     
     # different url
     list3 = ['wikipedia.org']
     
     self.assertFalse(UrlComparator.isNormalizeUnique(url, list1))
     self.assertFalse(UrlComparator.isNormalizeUnique(url, list2))
     self.assertTrue(UrlComparator.isNormalizeUnique(url, list3))
コード例 #2
0
ファイル: tests.py プロジェクト: hunlan/mycsesortingoption
    def test_normunique(self):
        url = 'http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations'
        # same url
        list1 = [
            'http://en.wikipedia.org/wiki/Unit_testing#Unit_testing_limitations'
        ]

        # norm same url
        list2 = ['http://en.wikipedia.org/wiki/Unit_testing#Language-']

        # different url
        list3 = ['wikipedia.org']

        self.assertFalse(UrlComparator.isNormalizeUnique(url, list1))
        self.assertFalse(UrlComparator.isNormalizeUnique(url, list2))
        self.assertTrue(UrlComparator.isNormalizeUnique(url, list3))
コード例 #3
0
ファイル: checkURL.py プロジェクト: hunlan/mycsesortingoption
     # url valid
     uv = UrlValidator()
     isValid = uv.validate(url)
             
     # remove url in urls
     wo_url_in_urls = urls[:]
     wo_url_in_urls.remove(url)
     
     # initialize param
     normURL = None
     isSrcUnique = UrlComparator.isSourceUnique(url, wo_url_in_urls)
     isNormUnique = None
     
     if isValid:
         uc = UrlCanonicalizer()
         normURL = uc.canonicalizerValidator(uv)
         isNormUnique = UrlComparator.isNormalizeUnique(url, wo_url_in_urls, False)
         
     print 'Source: ' + url
     print 'Valid: ' + str(isValid)
     print 'Canonical: ' + ('None' if normURL == None else normURL)
     print 'Source unique: ' + str(isSrcUnique)
     print 'Canonicalized URL unique: ' + ('N/A' if isNormUnique == None else str(isNormUnique))
     
     print ''
 
 # clean up        
 infile.close()