Example #1
0
def response (url, response):
    """A response has been recieved from the spider when contacting a url"""

    log.info("Processing %s" % url)

def htmlCallback(url, html):
    """Html has been found in the response"""

    log.info(html)

def linkCallback(parentUrl, linkUrl, element):
    """A link has been found in the page being spidered"""

    log.info("Link Found: %s %s %s" % (parentUrl, linkUrl, str(element)))

def levelCallback(levelNumber):
    """A new level of the page is being started"""
    
    log.info("Spidering level %d" % levelNumber)

if __name__ == '__main__':
    logging.basicConfig(level = logging.INFO,
                        format = "%(asctime)s %(levelname)s %(threadName)s: %(message)s")
    spider = Spider(args.url, threads=args.threads, maxDepth=args.depth)
    spider.responseCallback = response
    spider.htmlCallback = htmlCallback
    spider.linkCallback = linkCallback
    spider.levelCallback = levelCallback
    spider.spider()