class URIBruter(object): ''' URI bruteforce. @remarks: allowTypes: 字典类型列表,["webbak","cfgbak","interestfile","webconsole"] ''' allowTypes = ["webbak","cfgbak","interestfile","webconsole"] _dirInfoFile = os.path.join(sys.path[0],"script","data","uri_brute.yaml") _javaConsoleFile = os.path.join(sys.path[0],"script","data","java_webconsole.yaml") def __init__(self, types, keywords=[], exts=[], size="small", url=None): ''' @params: keywords: 指定关键字列表,关键字用于生成备份文件字典 exts: 指定文件后缀列表,生成的字典文件会自动加入这些后缀 ''' self.types = types self.keywords = keywords self.exts = exts if exts else ["php"] self.size = size self.dirInfo = self._loadDirInfo() self.log = Log("uribrute") def _getKeywordFromURL(self, url): ''' 从URL中提取关键字,例如xxx.com 提取 xxx,该关键字将用于生成web备份文件字典 ''' host = urlparse.urlparse(url)[1] if not host: return None if URL.isIP(url): return None hostsp = host.split(".") try: if host.startswith("www."): keyword = hostsp[1] else: keyword = hostsp[0] except IndexError: return None return keyword def _genKeywordWebbakDict(self): ''' 根据用户指定关键字生成web_backup字典 ''' suffixList = self.dirInfo['web_bak_file'] result = [] for suffix in suffixList: for keyword in self.keywords: result.append("".join([keyword,suffix])) result.append("-".join([keyword,suffix])) result.append("_".join([keyword,suffix])) return [unicode(x) for x in self.keywords] + result def _loadJavaConsoleDict(self): result = [] javaConsoleInfo = YamlConf(self._javaConsoleFile) for server, consoles in javaConsoleInfo.iteritems(): for console in consoles: if console['type'] == "http": if console['url'] != "/": result.append(console['url']) return result def _loadDirInfo(self): ''' 加载url_brute.yaml数据文件,处理'<ext>'占位符,返回dirInfo字典 ''' result = {} dirInfo = YamlConf(self._dirInfoFile) for key, value in dirInfo.iteritems(): result[key] = [] for line in value: if "<ext>" in line: for ext in self.exts: result[key].append(line.replace("<ext>", ext)) else: result[key].append(line) return result def _dictIter(self): ''' 返回特定类型字典的生成器 ''' if "webbak" in self.types: if self.keywords: self.dirInfo['web_bak_file'] += self._genKeywordWebbakDict() if self.size == "small": self.dirInfo['web_bak_dir'] = [] for zdir in [""]+self.dirInfo['web_bak_dir']: for zfile in self.dirInfo['web_bak_file']: for ext in self.dirInfo['web_bak_ext']: if zdir: yield "/"+zdir+"/"+zfile+ext else: yield "/"+zfile+ext if "cfgbak" in self.types: if self.size == "small": self.dirInfo['cfg_bak_dir'] = [] for bdir in [""]+self.dirInfo['cfg_bak_dir']: for bfile in self.dirInfo['cfg_bak_file']: for ext in self.dirInfo['cfg_bak_ext']: if bdir: yield "/"+bdir+"/"+bfile+ext else: yield "/"+bfile+ext if "webconsole" in self.types: for cdir in [""]+self.dirInfo['web_console_dir']: for cfile in self.dirInfo['web_console_file']: if cdir: yield "/"+cdir+cfile else: yield "/"+cfile if "interestfile" in self.types: for line in self.dirInfo['interest_file']: yield "/"+line if "jsp" in self.exts: for line in self._loadJavaConsoleDict(): yield line def genDict(self, url=None): ''' 生成特定类型的字典文件 ''' if url: keyword = self._getKeywordFromURL(url) if keyword: self.keywords.append(keyword) result = [] for line in self._dictIter(): result.append(line) return result def _safeRequest(self, safeURL): ''' 安全请求,用于绕过WAF等安全设备 ''' if not safeURL: return #url = random.choice(safeURL.split()) try: http.get(safeURL) except http.ConnectionError: pass def bruteforce(self, baseURL, notFoundPattern=None, safeURL=None, timeout=10, delay=0): ''' 爆破 ''' baseURL = URL.getURI(baseURL) keyword = self._getKeywordFromURL(baseURL) if keyword: self.keywords.append(keyword) matchs = [] baseURL = baseURL.rstrip("/") for line in self._dictIter(): time.sleep(delay) self._safeRequest(safeURL) url = baseURL.rstrip("/") + line try: self.log.debug(u"request url '{0}'".format(url)) #response = http.get(url, timeout=timeout) response = http.get(url, timeout=timeout, allow_redirects=False) except http.ConnectionError: continue if response.status_code == 200: if notFoundPattern: if notFoundPattern in response.content: continue #if response.history: # if notFoundPattern in response.history[0].content: # continue else: self.log.debug(u"find available url '{0}'".format(url)) matchs.append(url) else: continue return matchs
class CMSIdentify(object): ''' CMS识别功能 ''' _fingerprintFile = os.path.join(conf['ptdpath'], "cms_fingerprint.yaml") def __init__(self, baseURL, notFoundPattern=None): ''' @params: baseURL: 待识别的站点的URL notFoundPattern: 指定notFoundPattern,有时候website只返回301或200,这时候需要该字段来识别‘404’ ''' baseURL = URL.getURI(baseURL) self.baseURL = baseURL.rstrip("/") self.notFoundPattern = notFoundPattern self.fp = YamlConf(self._fingerprintFile) self.log = Log("cmsidentify") def _checkPath(self, path, pattern): url = self.baseURL + path try: #response = http.get(url) response = http.get(url, allow_redirects=False) except http.ConnectionError as error: self.log.debug( "Checking '{0}' failed, connection failed".format(url)) return False if response.status_code == 200: if self.notFoundPattern: if self.notFoundPattern in response.content: self.log.debug( "Checking '{0}' failed, notFoundPattern matchs.". format(url)) return False #if response.history: # if self.notFoundPattern in response.history[0].content: # self.log.debug("Checking '{0}' failed, notFoundPattern matchs.".format(url)) # return False if not pattern: self.log.debug( "Checking '{0}' success, status code 200.".format(url)) return True else: if pattern in response.text: self.log.debug( "Checking '{0}' success, status code 200, match pattern {1}." .format(url, pattern)) return True else: self.log.debug( "Checking '{0}' failed, pattern not found.".format( url)) return False else: self.log.debug("Checking '{0}' failed, status code {1}".format( url, response.status_code)) return False def _checkCMS(self, cmstype, cmsfp): matchList = [] for line in cmsfp: if line['need']: if not self._checkPath(line['path'], line['pattern']): return False else: if self._checkPath(line['path'], line['pattern']): matchList.append([line['path'], line['pattern']]) return matchList if matchList else False def identify(self): ''' CMS识别 @returns: (cmstype, matchs):CMS识别结果,返回元组CMS类型,详细识别信息,如果识别失败,则matchs为空 ''' for cmstype, cmsfp in self.fp.iteritems(): self.log.debug("Verify {0}".format(cmstype)) matchs = self._checkCMS(cmstype, cmsfp) if matchs: break else: matchs = [] return (cmstype, matchs)
class CMSIdentify(object): ''' CMS识别功能 ''' _fingerprintFile = os.path.join(sys.path[0],"script","data","cms_fingerprint.yaml") def __init__(self, baseURL, notFoundPattern=None): ''' @params: baseURL: 待识别的站点的URL notFoundPattern: 指定notFoundPattern,有时候website只返回301或200,这时候需要该字段来识别‘404’ ''' baseURL = URL.getURI(baseURL) self.baseURL = baseURL.rstrip("/") self.notFoundPattern = notFoundPattern self.fp = YamlConf(self._fingerprintFile) self.log = Log("cmsidentify") def _checkPath(self, path, pattern): url = self.baseURL + path try: #response = http.get(url) response = http.get(url, allow_redirects=False) except http.ConnectionError as error: self.log.debug("Checking '{0}' failed, connection failed".format(url)) return False if response.status_code == 200: if self.notFoundPattern: if self.notFoundPattern in response.content: self.log.debug("Checking '{0}' failed, notFoundPattern matchs.".format(url)) return False #if response.history: # if self.notFoundPattern in response.history[0].content: # self.log.debug("Checking '{0}' failed, notFoundPattern matchs.".format(url)) # return False if not pattern: self.log.debug("Checking '{0}' success, status code 200.".format(url)) return True else: if pattern in response.text: self.log.debug("Checking '{0}' success, status code 200, match pattern {1}.".format(url,pattern)) return True else: self.log.debug("Checking '{0}' failed, pattern not found.".format(url)) return False else: self.log.debug("Checking '{0}' failed, status code {1}".format(url, response.status_code)) return False def _checkCMS(self, cmstype, cmsfp): matchList = [] for line in cmsfp: if line['need']: if not self._checkPath(line['path'], line['pattern']): return False else: if self._checkPath(line['path'], line['pattern']): matchList.append([line['path'], line['pattern']]) return matchList if matchList else False def identify(self): ''' CMS识别 @returns: (cmstype, matchs):CMS识别结果,返回元组CMS类型,详细识别信息,如果识别失败,则matchs为空 ''' for cmstype,cmsfp in self.fp.iteritems(): self.log.debug("Verify {0}".format(cmstype)) matchs = self._checkCMS(cmstype, cmsfp) if matchs: break else: matchs = [] return (cmstype,matchs)
class CMSIdentify(object): ''' Identify the CMS type. Input: baseURL: base url to test. notFoundPattern: 指定notFoundPattern,有时候website只返回301或200,这时候需要该字段来识别‘404’ ''' fingerprintFile = os.path.join(sys.path[0], "directory", "cms", "cms_fingerprint.json") def __init__(self, baseURL, notFoundPattern=None): if not baseURL.startswith("http"): raise PenError( "CMSIdentify, baseURL format error, not startswith 'http(s)'.") self.baseURL = baseURL.rstrip("/") self.notFoundPattern = notFoundPattern self.fp = json.load(open(self.fingerprintFile, "r")) self.log = Log("cmsidentify") def checkPath(self, path, pattern): url = self.baseURL + path try: response = http.get(url) except http.RequestException as error: self.log.debug( "Checking '{0}' failed, connection failed".format(url)) return False if response.status_code == 200: if self.notFoundPattern: if self.notFoundPattern in response.content: self.log.debug( "Checking '{0}' failed, notFoundPattern matchs.". format(url)) return False if response.history: if self.notFoundPattern in response.history[0].content: self.log.debug( "Checking '{0}' failed, notFoundPattern matchs.". format(url)) return False if not pattern: self.log.debug( "Checking '{0}' success, status code 200.".format(url)) return True else: if pattern in response.text: self.log.debug( "Checking '{0}' success, status code 200, match pattern {1}." .format(url, pattern)) return True else: self.log.debug( "Checking '{0}' failed, pattern not found.".format( url)) return False else: self.log.debug("Checking '{0}' failed, status code {1}".format( url, response.status_code)) return False def checkCMS(self, cmstype, cmsfp): matchList = [] for line in cmsfp: if line['need']: if not self.checkPath(line['path'], line['pattern']): return False else: if self.checkPath(line['path'], line['pattern']): matchList.append([line['path'], line['pattern']]) return matchList if matchList else False def identify(self): ''' CMS识别 Output: CMS识别结果,结果形式(cmstype, matchs),如果识别失败,则matchs为空 ''' for cmstype, cmsfp in self.fp.iteritems(): self.log.debug("Verify {0}".format(cmstype)) matchs = self.checkCMS(cmstype, cmsfp) if matchs: break else: matchs = [] return (cmstype, matchs)
class CMSIdentify(object): ''' Identify the CMS type. Input: baseURL: base url to test. notFoundPattern: 指定notFoundPattern,有时候website只返回301或200,这时候需要该字段来识别‘404’ ''' fingerprintFile = os.path.join(sys.path[0],"directory","cms","cms_fingerprint.json") def __init__(self, baseURL, notFoundPattern=None): if not baseURL.startswith("http"): raise PenError("CMSIdentify, baseURL format error, not startswith 'http(s)'.") self.baseURL = baseURL.rstrip("/") self.notFoundPattern = notFoundPattern self.fp = json.load(open(self.fingerprintFile, "r")) self.log = Log("cmsidentify") def checkPath(self, path, pattern): url = self.baseURL + path try: response = http.get(url) except http.RequestException as error: self.log.debug("Checking '{0}' failed, connection failed".format(url)) return False if response.status_code == 200: if self.notFoundPattern: if self.notFoundPattern in response.content: self.log.debug("Checking '{0}' failed, notFoundPattern matchs.".format(url)) return False if response.history: if self.notFoundPattern in response.history[0].content: self.log.debug("Checking '{0}' failed, notFoundPattern matchs.".format(url)) return False if not pattern: self.log.debug("Checking '{0}' success, status code 200.".format(url)) return True else: if pattern in response.text: self.log.debug("Checking '{0}' success, status code 200, match pattern {1}.".format(url,pattern)) return True else: self.log.debug("Checking '{0}' failed, pattern not found.".format(url)) return False else: self.log.debug("Checking '{0}' failed, status code {1}".format(url, response.status_code)) return False def checkCMS(self, cmstype, cmsfp): matchList = [] for line in cmsfp: if line['need']: if not self.checkPath(line['path'], line['pattern']): return False else: if self.checkPath(line['path'], line['pattern']): matchList.append([line['path'], line['pattern']]) return matchList if matchList else False def identify(self): ''' CMS识别 Output: CMS识别结果,结果形式(cmstype, matchs),如果识别失败,则matchs为空 ''' for cmstype,cmsfp in self.fp.iteritems(): self.log.debug("Verify {0}".format(cmstype)) matchs = self.checkCMS(cmstype, cmsfp) if matchs: break else: matchs = [] return (cmstype,matchs)
class Service(object): ''' 服务识别 ''' def __init__(self, url, notFoundPattern=None, cmsEnhance=False): self._url = url.strip() self._notFoundPattern = notFoundPattern self._cmsEnhance = cmsEnhance if not URL.check(self._url): raise PenError("Service Identify, URL format error") self._target = URL.format(self._url) self._fp = YamlConf(os.path.join(sys.path[0],"script","data","app_fingerprint.yaml")) # debug>>>>>>>>>>>>>>>>>>> name = 'Drupal' ddddd = self._fp['Applications'][name] #self._fp['Applications'] = {name:ddddd} # debug>>>>>>>>>>>>>>>>>>>>> # metaInfo 页面元信息 # url, statusCode, headers, html, title, robots self._metaInfo = {} self._initMetaInfo() # result 中存储的信息 self._result = MatchsInfo(self._fp) self._matchFuncs = {} self._initHandleFuncs() self._log = Log("service_identify") def _getTitle(self, html): tree = etree.HTML(html) titles = tree.xpath("//title/text()") if titles: return titles[0] else: return "blank" def _initMetaInfo(self): self._metaInfo['url'] = self._url self._metaInfo['target'] = self._target try: response = http.get(self._target.uri) except http.ConnectionError: raise PenError("Can not connect to {0}".format(self._target.uri)) else: self._metaInfo['statusCode'] = response.status_code self._metaInfo['headers'] = response.headers self._metaInfo['html'] = response.content self._metaInfo['title'] = self._getTitle(response.content) self._metaInfo['robots'] = "" try: response = http.get(self._target.baseURL+"robots.txt") except http.ConnectionError: pass else: if response.status_code == 200: self._metaInfo['robots'] = response.content def _initHandleFuncs(self): ''' 初始化匹配函数字典,对应app_figerprint中的match定义 目前支持:uri, headers, html, requests, robots matchFunc定义: 输入:fingerprint定义 输出:[match_place,match_str,version_info] match_place(必须)为url/headers/html等,match_str(必须)为匹配信息,version_info(必须)为版本信息 如果没有相关信息则为None 例如: self._matchHeaders(fp) 返回 ['headers_Server', 'Apache/2.4.18', '2.4.18'] ''' self._matchFuncs['uri'] = self._matchUri self._matchFuncs['headers'] = self._matchHeaders self._matchFuncs['html'] = self._matchHtml self._matchFuncs['requests'] = self._matchRequests self._matchFuncs['robots'] = self._matchRobots def identify(self): self._result['meta'] = self._metaInfo for fkey,fvalue in self._fp['Applications'].iteritems(): bestMatch = [] for mkey,mvalue in fvalue['matchs'].iteritems(): try: func = self._matchFuncs[mkey] except KeyError: self._log.error("the function which handle {0} dose not exists".format(mkey)) continue matchResult = func(mvalue) if matchResult[1]: if not bestMatch: bestMatch = matchResult else: if matchResult[2] and not bestMatch[2]: bestMatch = matchResult if bestMatch: for cat in fvalue['cats']: self._result.appendMatch(cat,fkey,bestMatch) #self._result['apps'][cat].append([fkey]+bestMatch) if 'implies' in fvalue: try: for i in fvalue['implies']: for cat in self._fp['Applications'][i]['cats']: #self._result['apps'][cat].append([i]+['{0}_implies'.format(fkey),'implies',None]) self._result.appendMatch(cat,i,['{0}_implies'.format(fkey),'implies',None]) except KeyError: pass return self._result def _matchUri(self, fp): match = re.search(stripPattern(fp), self._metaInfo['target'].uri, re.I) if match: subMatch = match.groups()[0] if match.groups() else None self._log.debug("match uri, {0}, {1}".format(match.group(),subMatch)) return ['uri', match.group(), subMatch] else: return ['uri', None, None] def _matchHeaders(self, fp): bestMatch = [] for key,value in fp.iteritems(): match = re.search(stripPattern(value), self._metaInfo['headers'].get(key.lower(),""), re.I) if match: subMatch = match.groups()[0] if match.groups() else None if not bestMatch: bestMatch = ['headers_{0}'.format(key), match.group(), subMatch] self._log.debug("match headers, {0}".format(str(bestMatch))) else: if subMatch and not bestMatch[2]: bestMatch = ['headers_{0}'.format(key), match.group(), subMatch] self._log.debug("match headers, {0}".format(str(bestMatch))) return bestMatch if bestMatch else ['headers',None,None] def _matchHtml(self, fp): if isinstance(fp,list): bestMatch = [] for pattern in fp: match = re.search(stripPattern(pattern), self._metaInfo['html'], re.I|re.DOTALL) if match: subMatch = match.groups()[0] if match.groups() else None if not bestMatch: bestMatch = ['html', match.group(), subMatch] self._log.debug("match html, {0}".format(str(bestMatch))) else: if subMatch and not bestMatch[2]: bestMatch = ['html', match.group(), subMatch] self._log.debug("match html, {0}".format(str(bestMatch))) return bestMatch if bestMatch else ['html',None,None] else: match = re.search(stripPattern(fp), self._metaInfo['html'], re.I|re.DOTALL) if match: subMatch = match.groups()[0] if match.groups() else None self._log.debug("match html, {0}".format(str(['html', match.group(), subMatch]))) return ['html', match.group(), subMatch] else: return ['html', None, None] def _matchRobots(self, fp): if isinstance(fp,list): bestMatch = [] for pattern in fp: match = re.search(stripPattern(pattern), self._metaInfo['robots'], re.I|re.DOTALL) if match: subMatch = match.groups()[0] if match.groups() else None if not bestMatch: bestMatch = ['robots'.format(key), match.group(), subMatch] self._log.debug("match robots, {0}".format(str(bestMatch))) else: if subMatch and not bestMatch[2]: bestMatch = ['robots'.format(key), match.group(), subMatch] self._log.debug("match robots, {0}".format(str(bestMatch))) return bestMatch if bestMatch else ['robots',None,None] else: match = re.search(stripPattern(fp), self._metaInfo['robots'], re.I|re.DOTALL) if match: subMatch = match.groups()[0] if match.groups() else None self._log.debug("match robots, {0}".format(str(['robots', match.group(), subMatch]))) return ['robots', match.group(), subMatch] else: return ['robots', None, None] def _matchRequests(self, fp): if not self._cmsEnhance: return ['requests', None, None] matchs = [] for line in fp: uri = self._metaInfo['target'].baseURL.rstrip("/") + line try: self._log.debug("matchRequests get {0}".format(uri)) response = http.get(uri, allow_redirects=False) except http.ConnectionError: continue else: if response.status_code == 200: if self._notFoundPattern: if self._notFoundPattern in response.content: continue else: self._log.debug("matchRequests got >>> {0}".format(uri)) matchs.append(uri) else: self._log.debug("matchRequests got >>> {0}".format(uri)) matchs.append(uri) else: continue if len(matchs) == len(fp): return ['requests', str(matchs), None] else: return ['requests', None, None]
class URIBruter(object): ''' URI bruteforce. allowTypes: 字典类型列表,["webbak","cfgbak","interestfile","webconsole"] keywords: 指定关键字列表,关键字用于生成备份文件字典 exts: 指定文件后缀列表,生成的字典文件会自动加入这些后缀 ''' allowTypes = ["webbak","cfgbak","interestfile","webconsole"] dirInfoFile = os.path.join(sys.path[0],"directory","files.json") javaConsoleFile = os.path.join(sys.path[0],"directory","java-webconsole.json") def __init__(self, types, keywords=[], exts=[], size="small"): self.types = types self.keywords = keywords self.exts = exts if exts else ["php"] self.size = size self.dirInfo = self._loadDirInfo() self.log = Log("uribrute") # 根据用户指定关键字生成web_backup字典 def _genKeywordWebbakDict(self, dirInfo=None): dirInfo = dirInfo if dirInfo else self.dirInfo suffixList = dirInfo['web_bak_file'] result = [] for suffix in suffixList: for keyword in self.keywords: result.append("".join([keyword,suffix])) result.append("-".join([keyword,suffix])) result.append("_".join([keyword,suffix])) return [unicode(x) for x in self.keywords] + result def _loadJavaConsoleDict(self): result = [] javaConsoleInfo = json.load(open(self.javaConsoleFile, "r")) for server, consoles in javaConsoleInfo.iteritems(): for console in consoles: if console['type'] == "http": if console['url'] != "/": result.append(console['url']) return result def _loadDirInfo(self): ''' 加载files.json数据文件,处理'<ext>'占位符,返回dirInfo字典 ''' result = {} dirInfo = json.load(open(self.dirInfoFile, "r")) for key, value in dirInfo.iteritems(): result[key] = [] for line in value: if "<ext>" in line: for ext in self.exts: result[key].append(line.replace("<ext>", ext)) else: result[key].append(line) if self.keywords: result['web_bak_file'] += self._genKeywordWebbakDict(result) return result def _dictIter(self): ''' 返回特定类型字典的生成器 ''' if "webbak" in self.types: if self.size == "small": self.dirInfo['web_bak_dir'] = [] for zdir in [""]+self.dirInfo['web_bak_dir']: for zfile in self.dirInfo['web_bak_file']: for ext in self.dirInfo['web_bak_ext']: if zdir: yield "/"+zdir+"/"+zfile+ext else: yield "/"+zfile+ext if "cfgbak" in self.types: if self.size == "small": self.dirInfo['cfg_bak_dir'] = [] for bdir in [""]+self.dirInfo['cfg_bak_dir']: for bfile in self.dirInfo['cfg_bak_file']: for ext in self.dirInfo['cfg_bak_ext']: if bdir: yield "/"+bdir+"/"+bfile+ext else: yield "/"+bfile+ext if "webconsole" in self.types: for cdir in [""]+self.dirInfo['web_console_dir']: for cfile in self.dirInfo['web_console_file']: if cdir: yield "/"+cdir+cfile else: yield "/"+cfile if "interestfile" in self.types: for line in self.dirInfo['interest_file']: yield "/"+line if "jsp" in self.exts: for line in self._loadJavaConsoleDict(): yield line def genDict(self): ''' 生成特定类型的字典文件 ''' result = [] for line in self._dictIter(): result.append(line) return result def _safeRequest(self, safeURL): if not safeURL: return #url = random.choice(safeURL.split()) try: http.get(safeURL) except http.ConnectionError: pass def bruteforce(self, baseURL, notFoundPattern=None, safeURL=None, timeout=10, delay=0): ''' 爆破 ''' matchs = [] for line in self._dictIter(): time.sleep(delay) self._safeRequest(safeURL) url = baseURL + line try: self.log.debug(u"request url '{0}'".format(url)) response = http.get(url, timeout=timeout) except http.ConnectionError: continue if response.status_code == 200: if notFoundPattern: if notFoundPattern in response.content: continue if response.history: if notFoundPattern in response.history[0].content: continue else: self.log.debug(u"find available url '{0}'".format(url)) matchs.append(url) else: continue return matchs
class URIBruter(object): ''' URI bruteforce. Input: baseURL: base url stype: 服务器类型, 支持 asp/aspx/php/jsp. 如果为 'None', 使用所有类型. keywords: 指定关键字列表,关键字用于生成备份文件字典 exts: 指定文件后缀列表,生成的字典文件会自动加入这些后缀 notFoundPattern: 指定notFoundPattern,有时候website只返回301或200,这时候需要该字段来识别‘404’ ''' infoFile = os.path.join(sys.path[0],"directory","files.json") javaConsoleFile = os.path.join(sys.path[0],"directory","java-webconsole.json") def __init__(self, baseURL=None, stype=None, keywords=None, exts=None, notFoundPattern=None, safeURL=None, timeout=None, delay=None, encode="utf-8"): if baseURL: if not baseURL.startswith("http"): raise PenError("URIBruter, baseURL format error, not startswith 'http'.") self.baseURL = baseURL.rstrip("/") else: self.baseURL = baseURL self.stype = stype self.keywords = keywords.split() if keywords else [] self.userdefinedExt = exts.split() if exts else [] self.defaultExt = ["asp","aspx","php","jsp"] self.notFoundPattern = notFoundPattern self.safeURL = safeURL self.timeout = int(timeout) if timeout else 10 self.delay = float(delay) if delay else 0 self.encode = encode if encode else "utf-8" self.info = self._loadInfoDB() self.log = Log("uribrute") def _genUserdefinedDict(self): suffixList = [u"web", u"www",u"webroot", u"wwwroot",u"backup", u"back",u"bak",u"backupdata",u"0", u"1",u"aaa", u"db", u"data", u"database", u"备份", u"网站备份"] result = [] for suffix in suffixList: for keyword in self.keywords: result.append("".join([keyword,suffix])) result.append("-".join([keyword,suffix])) result.append("_".join([keyword,suffix])) return [unicode(x) for x in self.keywords] + result def _loadJavaConsole(self): result = [] info = json.load(open(self.javaConsoleFile, "r")) for server, consoles in info.iteritems(): for console in consoles: if console['type'] == "http": result.append(console['url']) return result def _loadInfoDB(self): ''' 加载files.json数据文件,处理'<ext>'占位符,返回info字典 ''' result = {} info = json.load(open(self.infoFile, "r")) for key, value in info.iteritems(): result[key] = [] for line in value: if "<ext>" in line: if not self.stype: for ext in self.defaultExt+self.userdefinedExt: result[key].append(line.replace("<ext>", ext)) else: if self.stype in self.defaultExt: result[key].append(line.replace("<ext>", self.stype)) for ext in self.userdefinedExt: result[key].append(line.replace("<ext>", ext)) else: result[key].append(line) if self.keywords: result['webzip_file'] += self._genUserdefinedDict() return result def _dictIter(self): ''' 返回特定类型字典的生成器 ''' for zdir in [""]+self.info['webzip_dir']: for zfile in self.info['webzip_file']: for ext in self.info['webzip_ext']: if zdir: yield "/"+zdir+"/"+zfile+ext else: yield "/"+zfile+ext for bdir in [""]+self.info['backup_dir']: for bfile in self.info['backup_file']: for ext in self.info['backup_ext']: if bdir: yield "/"+bdir+"/"+bfile+ext else: yield "/"+bfile+ext for cdir in [""]+self.info['web_console_dir']: for cfile in self.info['web_console_file']: if cdir: yield "/"+cdir+cfile else: yield "/"+cfile if self.stype == "jsp": for line in self._loadJavaConsole(): yield line for line in self.info['interesting_file']: yield "/"+line def genDict(self): ''' 生成特定类型的字典文件 ''' result = [] for line in self._dictIter(): result.append(line) return result def _safeRequest(self): if not self.safeURL: return url = random.choice(self.safeURL.split()) try: response.get(url, timeout=self.timeout) except: pass def bruteforce(self): ''' 爆破 ''' if not self.baseURL: return False matchs = [] for line in self._dictIter(): time.sleep(self.delay) self._safeRequest() url = self.baseURL+line try: self.log.debug("request url '{0}'".format(url)) response = http.get(url.encode(self.encode), timeout=self.timeout) except http.RequestException: continue if response.status_code == 200: if self.notFoundPattern: if self.notFoundPattern in response.content: continue if response.history: if self.notFoundPattern in response.history[0].content: continue else: self.log.debug("find available url '{0}'".format(url)) matchs.append(url) else: continue return matchs