class Service(object): ''' 服务识别 ''' def __init__(self, url, notFoundPattern=None, cmsEnhance=False): self._url = url.strip() self._notFoundPattern = notFoundPattern self._cmsEnhance = cmsEnhance if not URL.check(self._url): raise PenError("Service Identify, URL format error") self._target = URL.format(self._url) self._fp = YamlConf(os.path.join(sys.path[0],"script","data","app_fingerprint.yaml")) # debug>>>>>>>>>>>>>>>>>>> name = 'Drupal' ddddd = self._fp['Applications'][name] #self._fp['Applications'] = {name:ddddd} # debug>>>>>>>>>>>>>>>>>>>>> # metaInfo 页面元信息 # url, statusCode, headers, html, title, robots self._metaInfo = {} self._initMetaInfo() # result 中存储的信息 self._result = MatchsInfo(self._fp) self._matchFuncs = {} self._initHandleFuncs() self._log = Log("service_identify") def _getTitle(self, html): tree = etree.HTML(html) titles = tree.xpath("//title/text()") if titles: return titles[0] else: return "blank" def _initMetaInfo(self): self._metaInfo['url'] = self._url self._metaInfo['target'] = self._target try: response = http.get(self._target.uri) except http.ConnectionError: raise PenError("Can not connect to {0}".format(self._target.uri)) else: self._metaInfo['statusCode'] = response.status_code self._metaInfo['headers'] = response.headers self._metaInfo['html'] = response.content self._metaInfo['title'] = self._getTitle(response.content) self._metaInfo['robots'] = "" try: response = http.get(self._target.baseURL+"robots.txt") except http.ConnectionError: pass else: if response.status_code == 200: self._metaInfo['robots'] = response.content def _initHandleFuncs(self): ''' 初始化匹配函数字典,对应app_figerprint中的match定义 目前支持:uri, headers, html, requests, robots matchFunc定义: 输入:fingerprint定义 输出:[match_place,match_str,version_info] match_place(必须)为url/headers/html等,match_str(必须)为匹配信息,version_info(必须)为版本信息 如果没有相关信息则为None 例如: self._matchHeaders(fp) 返回 ['headers_Server', 'Apache/2.4.18', '2.4.18'] ''' self._matchFuncs['uri'] = self._matchUri self._matchFuncs['headers'] = self._matchHeaders self._matchFuncs['html'] = self._matchHtml self._matchFuncs['requests'] = self._matchRequests self._matchFuncs['robots'] = self._matchRobots def identify(self): self._result['meta'] = self._metaInfo for fkey,fvalue in self._fp['Applications'].iteritems(): bestMatch = [] for mkey,mvalue in fvalue['matchs'].iteritems(): try: func = self._matchFuncs[mkey] except KeyError: self._log.error("the function which handle {0} dose not exists".format(mkey)) continue matchResult = func(mvalue) if matchResult[1]: if not bestMatch: bestMatch = matchResult else: if matchResult[2] and not bestMatch[2]: bestMatch = matchResult if bestMatch: for cat in fvalue['cats']: self._result.appendMatch(cat,fkey,bestMatch) #self._result['apps'][cat].append([fkey]+bestMatch) if 'implies' in fvalue: try: for i in fvalue['implies']: for cat in self._fp['Applications'][i]['cats']: #self._result['apps'][cat].append([i]+['{0}_implies'.format(fkey),'implies',None]) self._result.appendMatch(cat,i,['{0}_implies'.format(fkey),'implies',None]) except KeyError: pass return self._result def _matchUri(self, fp): match = re.search(stripPattern(fp), self._metaInfo['target'].uri, re.I) if match: subMatch = match.groups()[0] if match.groups() else None self._log.debug("match uri, {0}, {1}".format(match.group(),subMatch)) return ['uri', match.group(), subMatch] else: return ['uri', None, None] def _matchHeaders(self, fp): bestMatch = [] for key,value in fp.iteritems(): match = re.search(stripPattern(value), self._metaInfo['headers'].get(key.lower(),""), re.I) if match: subMatch = match.groups()[0] if match.groups() else None if not bestMatch: bestMatch = ['headers_{0}'.format(key), match.group(), subMatch] self._log.debug("match headers, {0}".format(str(bestMatch))) else: if subMatch and not bestMatch[2]: bestMatch = ['headers_{0}'.format(key), match.group(), subMatch] self._log.debug("match headers, {0}".format(str(bestMatch))) return bestMatch if bestMatch else ['headers',None,None] def _matchHtml(self, fp): if isinstance(fp,list): bestMatch = [] for pattern in fp: match = re.search(stripPattern(pattern), self._metaInfo['html'], re.I|re.DOTALL) if match: subMatch = match.groups()[0] if match.groups() else None if not bestMatch: bestMatch = ['html', match.group(), subMatch] self._log.debug("match html, {0}".format(str(bestMatch))) else: if subMatch and not bestMatch[2]: bestMatch = ['html', match.group(), subMatch] self._log.debug("match html, {0}".format(str(bestMatch))) return bestMatch if bestMatch else ['html',None,None] else: match = re.search(stripPattern(fp), self._metaInfo['html'], re.I|re.DOTALL) if match: subMatch = match.groups()[0] if match.groups() else None self._log.debug("match html, {0}".format(str(['html', match.group(), subMatch]))) return ['html', match.group(), subMatch] else: return ['html', None, None] def _matchRobots(self, fp): if isinstance(fp,list): bestMatch = [] for pattern in fp: match = re.search(stripPattern(pattern), self._metaInfo['robots'], re.I|re.DOTALL) if match: subMatch = match.groups()[0] if match.groups() else None if not bestMatch: bestMatch = ['robots'.format(key), match.group(), subMatch] self._log.debug("match robots, {0}".format(str(bestMatch))) else: if subMatch and not bestMatch[2]: bestMatch = ['robots'.format(key), match.group(), subMatch] self._log.debug("match robots, {0}".format(str(bestMatch))) return bestMatch if bestMatch else ['robots',None,None] else: match = re.search(stripPattern(fp), self._metaInfo['robots'], re.I|re.DOTALL) if match: subMatch = match.groups()[0] if match.groups() else None self._log.debug("match robots, {0}".format(str(['robots', match.group(), subMatch]))) return ['robots', match.group(), subMatch] else: return ['robots', None, None] def _matchRequests(self, fp): if not self._cmsEnhance: return ['requests', None, None] matchs = [] for line in fp: uri = self._metaInfo['target'].baseURL.rstrip("/") + line try: self._log.debug("matchRequests get {0}".format(uri)) response = http.get(uri, allow_redirects=False) except http.ConnectionError: continue else: if response.status_code == 200: if self._notFoundPattern: if self._notFoundPattern in response.content: continue else: self._log.debug("matchRequests got >>> {0}".format(uri)) matchs.append(uri) else: self._log.debug("matchRequests got >>> {0}".format(uri)) matchs.append(uri) else: continue if len(matchs) == len(fp): return ['requests', str(matchs), None] else: return ['requests', None, None]