Esempio n. 1
0
if __name__ == "__main__":
    import sys
    print sys.argv
    rootURL = ''
    if len(sys.argv) > 1:
        rootURL = sys.argv[1]
    else:
        rootURL = 'vatgia.com'
    print 'Crawl ', rootURL

    from urlmanager import URLManager
    from urlrequest import URLRequest
    from storage import MySQLDBStorage
    from htmlprocessor import VGHMTLProcessor

    storage = MySQLDBStorage('localhost', 'root', 'adminpass', 'nc_vgdb')
    urlManager = URLManager(storage)
    processor = VGHMTLProcessor(storage)
    urlManager.addURL('http://{0}'.format(rootURL))
    urlManager.addURL('http://{0}/home'.format(rootURL))
    urlManager.addURL('http://{0}/home/'.format(rootURL))

    urlRequest = URLRequest('testlink.txt')
    while 1:
        nextURL = urlManager.nextURL()
        if not nextURL:
            break
        urlRequest.execute(nextURL, urlManager, processor)