Example #1
0
 def test_invalid(self):
     urls = ['://stuff.com', '//stuff.com', 'stuff.com', 'www.stuff.com', 'http://', 'http:', 'http', 'ftp://[email protected]', 'ftp://[email protected]/path?q#frag', 'ftp://ftp.epcc.ed.ac.uk ', 'http://a', 'http://a.', 'http://a.b', 'http://a:a.b', 'http://a/b.c/ef.gh', 'http://999.999.999.999:123ab', 'http://999.999.999.999:-12', 'http://1234567890123456789012345678901234567890123456789012345678901234.com']
     for url in urls:
         self.assertFalse(validator.is_valid(url))
Example #2
0
    parser.add_argument('-c', '--comparator', choices=comparators.keys(),
                        help='comparison function to define URL uniqueness (DEFAULT=alpha)')
    args = parser.parse_args()
    
    # read URLs from file
    urls = None
    try:
        urls = open(args.input).readlines()
        urls = [x.strip() for x in urls]
    except IOError:
        print "Error: File \"%s\" not found." % args.input
        sys.exit()

    # default comparator is alpha
    cmp = comparators['alpha']
    if args.comparator is not None:
        cmp = comparators[args.comparator]
        
    # set of urls, normalized
    normUrls = normalizer.normalize_list(urls)
    
    # print results
    for i, url in enumerate(urls):
        normUrl = normUrls[i]
        print 'Source:               ', url
        print 'Valid:                ', validator.is_valid(url)
        print 'Canonicalized:        ', normUrl
        print 'Source unique:        ', is_unique(url, urls, cmp)
        print 'Canonicalized unique: ', is_unique(normUrl, normUrls, cmp)
        print
        
Example #3
0
 def test_valid(self):
     urls = ['https://stuff.com', 'http://stuff.com', 'ftp://stuff.com', 'ftps://stuff.com', 'http://stuff.com/path//////', 'http://stuff.com/path.more/file.ext', 'http://stuff.com/path?q=wat+stuff', 'http://stuff.com/path#frag', 'http://stuff.com/path?q#frag', 'ftp://ftp.epcc.ed.ac', 'http://a.bc', 'http://a.bcdefg', 'http://a.bcde-fgh', 'http://a.b.c.ef', 'http://2001.0db8.85a3.0000.0000.8a2e.0370.7334', 'http://zzzz.gggg.eeee.9999.1234.mmmm.aaaa.wwww', 'http://123.1.2.3', 'http://999.999.999.999', 'http://999.999.999.999:99999', 'http://localhost', 'http://123456789012345678901234567890123456789012345678901234567890123.com']
     for url in urls:
         self.assertTrue(validator.is_valid(url))