Esempio n. 1
0
    parser.add_argument('-c', '--comparator', choices=comparators.keys(),
                        help='comparison function to define URL uniqueness (DEFAULT=alpha)')
    args = parser.parse_args()
    
    # read URLs from file
    urls = None
    try:
        urls = open(args.input).readlines()
        urls = [x.strip() for x in urls]
    except IOError:
        print "Error: File \"%s\" not found." % args.input
        sys.exit()

    # default comparator is alpha
    cmp = comparators['alpha']
    if args.comparator is not None:
        cmp = comparators[args.comparator]
        
    # set of urls, normalized
    normUrls = normalizer.normalize_list(urls)
    
    # print results
    for i, url in enumerate(urls):
        normUrl = normUrls[i]
        print 'Source:               ', url
        print 'Valid:                ', validator.is_valid(url)
        print 'Canonicalized:        ', normUrl
        print 'Source unique:        ', is_unique(url, urls, cmp)
        print 'Canonicalized unique: ', is_unique(normUrl, normUrls, cmp)
        print
        
Esempio n. 2
0
    try:
        outfile = open(args.output, 'w')
    except IOError:
        print "Error: Unable to open output file \"%s\"." % args.output
        sys.exit()

    try:
        urls = open(args.input).readlines()
    except IOError:
        print "Error: File \"%s\" not found." % args.input
        sys.exit()

    sel = 3  # default selection is quicksort
    if args.sort is not None: # try getting sort selection from command-line args
        sel = args.sort

    # remove leading/trailing whitespace from urls
    urls = [x.strip() for x in urls]

    # normalize and validate urls, if specified
    if args.filter is not None:
        validUrls = validator.valid_list(urls)
        if args.filter == 'valid':
            urls = normalizer.normalize_list(validUrls)
        elif args.filter == 'invalid':
            urls = filter(lambda x: x not in validUrls, urls)

    sorter = algos[sel](urls)
    sortedList = sorter.sort()
    outfile.write("\n".join(sortedList))