threadData.shared.count += 1 status = u'访问了%d/%d个链接(%d%%)' % (threadData.shared.count, threadData.shared.length, round(100.0 * threadData.shared.count / threadData.shared.length)) dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True) threadData.shared.deeper = set() threadData.shared.unprocessed = set([target]) if not conf.sitemapUrl: message = u"你想检查网站是否存在站点地图sitemap(.xml)文件吗? [y/N] " if readInput(message, default='N', boolean=True): found = True items = None url = urlparse.urljoin(target, "/sitemap.xml") try: items = parseSitemap(url) except SqlmapConnectionException, ex: if "page not found" in getSafeExString(ex): found = False logger.warn(u"'sitemap.xml'未找到") except: pass finally: if found: if items: for item in items: if re.search(r"(.*?)\?(.+)", item): threadData.shared.value.add(item) if conf.crawlDepth > 1: threadData.shared.unprocessed.update(items) logger.info("%s links found" % ("no" if not items else len(items)))
def crawl(target): try: visited = set() threadData = getCurrentThreadData() threadData.shared.value = OrderedSet() threadData.shared.formsFound = False def crawlThread(): threadData = getCurrentThreadData() while kb.threadContinue: with kb.locks.limit: if threadData.shared.unprocessed: current = threadData.shared.unprocessed.pop() if current in visited: continue elif conf.crawlExclude and re.search(conf.crawlExclude, current): dbgMsg = "skipping '%s'" % current logger.debug(dbgMsg) continue else: visited.add(current) else: break content = None try: if current: content = Request.getPage(url=current, crawling=True, raise404=False)[0] except SqlmapConnectionException as ex: errMsg = "connection exception detected ('%s'). skipping " % getSafeExString(ex) errMsg += "URL '%s'" % current logger.critical(errMsg) except SqlmapSyntaxException: errMsg = "invalid URL detected. skipping '%s'" % current logger.critical(errMsg) except _http_client.InvalidURL as ex: errMsg = "invalid URL detected ('%s'). skipping " % getSafeExString(ex) errMsg += "URL '%s'" % current logger.critical(errMsg) if not kb.threadContinue: break if isinstance(content, six.text_type): try: match = re.search(r"(?si)<html[^>]*>(.+)</html>", content) if match: content = "<html>%s</html>" % match.group(1) soup = BeautifulSoup(content) tags = soup('a') tags += re.finditer(r'(?i)\s(href|src)=["\'](?P<href>[^>"\']+)', content) tags += re.finditer(r'(?i)window\.open\(["\'](?P<href>[^)"\']+)["\']', content) for tag in tags: href = tag.get("href") if hasattr(tag, "get") else tag.group("href") if href: if threadData.lastRedirectURL and threadData.lastRedirectURL[0] == threadData.lastRequestUID: current = threadData.lastRedirectURL[1] url = _urllib.parse.urljoin(current, htmlUnescape(href)) # flag to know if we are dealing with the same target host _ = checkSameHost(url, target) if conf.scope: if not re.search(conf.scope, url, re.I): continue elif not _: continue if (extractRegexResult(r"\A[^?]+\.(?P<result>\w+)(\?|\Z)", url) or "").lower() not in CRAWL_EXCLUDE_EXTENSIONS: with kb.locks.value: threadData.shared.deeper.add(url) if re.search(r"(.*?)\?(.+)", url) and not re.search(r"\?(v=)?\d+\Z", url) and not re.search(r"(?i)\.(js|css)(\?|\Z)", url): threadData.shared.value.add(url) except UnicodeEncodeError: # for non-HTML files pass except ValueError: # for non-valid links pass finally: if conf.forms: threadData.shared.formsFound |= len(findPageForms(content, current, False, True)) > 0 if conf.verbose in (1, 2): threadData.shared.count += 1 status = '%d/%d links visited (%d%%)' % (threadData.shared.count, threadData.shared.length, round(100.0 * threadData.shared.count / threadData.shared.length)) dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True) threadData.shared.deeper = set() threadData.shared.unprocessed = set([target]) if kb.checkSitemap is None: message = "do you want to check for the existence of " message += "site's sitemap(.xml) [y/N] " kb.checkSitemap = readInput(message, default='N', boolean=True) if kb.checkSitemap: found = True items = None url = _urllib.parse.urljoin(target, "/sitemap.xml") try: items = parseSitemap(url) except SqlmapConnectionException as ex: if "page not found" in getSafeExString(ex): found = False logger.warn("'sitemap.xml' not found") except: pass finally: if found: if items: for item in items: if re.search(r"(.*?)\?(.+)", item): threadData.shared.value.add(item) if conf.crawlDepth > 1: threadData.shared.unprocessed.update(items) logger.info("%s links found" % ("no" if not items else len(items))) if not conf.bulkFile: infoMsg = "starting crawler for target URL '%s'" % target logger.info(infoMsg) for i in xrange(conf.crawlDepth): threadData.shared.count = 0 threadData.shared.length = len(threadData.shared.unprocessed) numThreads = min(conf.threads, len(threadData.shared.unprocessed)) if not conf.bulkFile: logger.info("searching for links with depth %d" % (i + 1)) runThreads(numThreads, crawlThread, threadChoice=(i > 0)) clearConsoleLine(True) if threadData.shared.deeper: threadData.shared.unprocessed = set(threadData.shared.deeper) else: break except KeyboardInterrupt: warnMsg = "user aborted during crawling. sqlmap " warnMsg += "will use partial list" logger.warn(warnMsg) finally: clearConsoleLine(True) if not threadData.shared.value: if not (conf.forms and threadData.shared.formsFound): warnMsg = "no usable links found (with GET parameters)" if conf.forms: warnMsg += " or forms" logger.warn(warnMsg) else: for url in threadData.shared.value: kb.targets.add((urldecode(url, kb.pageEncoding), None, None, None, None)) if kb.targets: if kb.normalizeCrawlingChoice is None: message = "do you want to normalize " message += "crawling results [Y/n] " kb.normalizeCrawlingChoice = readInput(message, default='Y', boolean=True) if kb.normalizeCrawlingChoice: seen = set() results = OrderedSet() for target in kb.targets: if target[1] in (HTTPMETHOD.GET, None): match = re.search(r"/[^/?]*\?.*\Z", target[0]) if match: key = re.sub(r"=[^=&]*", "=", match.group(0)).strip('&') if key not in seen: results.add(target) seen.add(key) else: results.add(target) kb.targets = results storeResultsToFile(kb.targets)
status = '%d/%d links visited (%d%%)' % (threadData.shared.count, threadData.shared.length, round(100.0 * threadData.shared.count / threadData.shared.length)) dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True) threadData.shared.deeper = set() threadData.shared.unprocessed = set([target]) if not conf.sitemapUrl: message = "do you want to check for the existence of " message += "site's sitemap(.xml) [y/N] " if readInput(message, default='N', boolean=True): found = True items = None url = urlparse.urljoin(target, "/sitemap.xml") try: items = parseSitemap(url) except SqlmapConnectionException, ex: if "page not found" in getSafeExString(ex): found = False logger.warn("'sitemap.xml' not found") except: pass finally: if found: if items: for item in items: if re.search(r"(.*?)\?(.+)", item): threadData.shared.value.add(item) if conf.crawlDepth > 1: threadData.shared.unprocessed.update(items) logger.info("%s links found" % ("no" if not items else len(items)))
def crawl(target): try: visited = set() threadData = getCurrentThreadData() threadData.shared.value = oset() def crawlThread(): threadData = getCurrentThreadData() while kb.threadContinue: with kb.locks.limit: if threadData.shared.unprocessed: current = threadData.shared.unprocessed.pop() if current in visited: continue elif conf.crawlExclude and re.search( conf.crawlExclude, current): dbgMsg = "skipping '%s'" % current logger.debug(dbgMsg) continue else: visited.add(current) else: break content = None try: if current: content = Request.getPage(url=current, crawling=True, raise404=False)[0] except SqlmapConnectionException as ex: errMsg = "connection exception detected ('%s'). skipping " % getSafeExString( ex) errMsg += "URL '%s'" % current logger.critical(errMsg) except SqlmapSyntaxException: errMsg = "invalid URL detected. skipping '%s'" % current logger.critical(errMsg) except httplib.InvalidURL as ex: errMsg = "invalid URL detected ('%s'). skipping " % getSafeExString( ex) errMsg += "URL '%s'" % current logger.critical(errMsg) if not kb.threadContinue: break if isinstance(content, unicode): try: match = re.search(r"(?si)<html[^>]*>(.+)</html>", content) if match: content = "<html>%s</html>" % match.group(1) soup = BeautifulSoup(content) tags = soup('a') if not tags: tags = re.finditer( r'(?i)<a[^>]+href="(?P<href>[^>"]+)"', content) for tag in tags: href = tag.get("href") if hasattr( tag, "get") else tag.group("href") if href: if threadData.lastRedirectURL and threadData.lastRedirectURL[ 0] == threadData.lastRequestUID: current = threadData.lastRedirectURL[1] url = urlparse.urljoin(current, href) # flag to know if we are dealing with the same target host _ = checkSameHost(url, target) if conf.scope: if not re.search(conf.scope, url, re.I): continue elif not _: continue if url.split('.')[-1].lower( ) not in CRAWL_EXCLUDE_EXTENSIONS: with kb.locks.value: threadData.shared.deeper.add(url) if re.search(r"(.*?)\?(.+)", url): threadData.shared.value.add(url) except UnicodeEncodeError: # for non-HTML files pass except ValueError: # for non-valid links pass finally: if conf.forms: findPageForms(content, current, False, True) if conf.verbose in (1, 2): threadData.shared.count += 1 status = '%d/%d links visited (%d%%)' % ( threadData.shared.count, threadData.shared.length, round(100.0 * threadData.shared.count / threadData.shared.length)) dataToStdout( "\r[%s] [INFO] %s" % (time.strftime("%X"), status), True) threadData.shared.deeper = set() threadData.shared.unprocessed = set([target]) if not conf.sitemapUrl: message = "do you want to check for the existence of " message += "site's sitemap(.xml) [y/N] " if readInput(message, default='N', boolean=True): found = True items = None url = urlparse.urljoin(target, "/sitemap.xml") try: items = parseSitemap(url) except SqlmapConnectionException as ex: if "page not found" in getSafeExString(ex): found = False logger.warn("'sitemap.xml' not found") except: pass finally: if found: if items: for item in items: if re.search(r"(.*?)\?(.+)", item): threadData.shared.value.add(item) if conf.crawlDepth > 1: threadData.shared.unprocessed.update(items) logger.info("%s links found" % ("no" if not items else len(items))) infoMsg = "starting crawler" if conf.bulkFile: infoMsg += " for target URL '%s'" % target logger.info(infoMsg) for i in xrange(conf.crawlDepth): threadData.shared.count = 0 threadData.shared.length = len(threadData.shared.unprocessed) numThreads = min(conf.threads, len(threadData.shared.unprocessed)) if not conf.bulkFile: logger.info("searching for links with depth %d" % (i + 1)) runThreads(numThreads, crawlThread, threadChoice=(i > 0)) clearConsoleLine(True) if threadData.shared.deeper: threadData.shared.unprocessed = set(threadData.shared.deeper) else: break except KeyboardInterrupt: warnMsg = "user aborted during crawling. sqlmap " warnMsg += "will use partial list" logger.warn(warnMsg) finally: clearConsoleLine(True) if not threadData.shared.value: warnMsg = "no usable links found (with GET parameters)" logger.warn(warnMsg) else: for url in threadData.shared.value: kb.targets.add( (urldecode(url, kb.pageEncoding), None, None, None, None)) storeResultsToFile(kb.targets)