def response (url, response): """A response has been recieved from the spider when contacting a url""" log.info("Processing %s" % url) def htmlCallback(url, html): """Html has been found in the response""" log.info(html) def linkCallback(parentUrl, linkUrl, element): """A link has been found in the page being spidered""" log.info("Link Found: %s %s %s" % (parentUrl, linkUrl, str(element))) def levelCallback(levelNumber): """A new level of the page is being started""" log.info("Spidering level %d" % levelNumber) if __name__ == '__main__': logging.basicConfig(level = logging.INFO, format = "%(asctime)s %(levelname)s %(threadName)s: %(message)s") spider = Spider(args.url, threads=args.threads, maxDepth=args.depth) spider.responseCallback = response spider.htmlCallback = htmlCallback spider.linkCallback = linkCallback spider.levelCallback = levelCallback spider.spider()