Esempio n. 1
0
class Service(object):
    '''
    服务识别
    '''
    def __init__(self, url, notFoundPattern=None, cmsEnhance=False):
        self._url = url.strip()
        self._notFoundPattern = notFoundPattern
        self._cmsEnhance = cmsEnhance
        if not URL.check(self._url):
            raise PenError("Service Identify, URL format error")

        self._target = URL.format(self._url)

        self._fp = YamlConf(os.path.join(sys.path[0],"script","data","app_fingerprint.yaml"))

        # debug>>>>>>>>>>>>>>>>>>>
        name = 'Drupal'
        ddddd = self._fp['Applications'][name]
        #self._fp['Applications'] = {name:ddddd}
        # debug>>>>>>>>>>>>>>>>>>>>>

        # metaInfo 页面元信息
        # url, statusCode, headers, html, title, robots
        self._metaInfo = {}
        self._initMetaInfo()
        # result 中存储的信息
        self._result = MatchsInfo(self._fp)

        self._matchFuncs = {}
        self._initHandleFuncs()

        self._log = Log("service_identify")


    def _getTitle(self, html):
        tree = etree.HTML(html)
        titles = tree.xpath("//title/text()")
        if titles:
            return titles[0]
        else:
            return "blank"


    def _initMetaInfo(self):
        self._metaInfo['url'] = self._url
        self._metaInfo['target'] = self._target
        try:
            response = http.get(self._target.uri)
        except http.ConnectionError:
            raise PenError("Can not connect to {0}".format(self._target.uri))
        else:
            self._metaInfo['statusCode'] = response.status_code
            self._metaInfo['headers'] = response.headers
            self._metaInfo['html'] = response.content
            self._metaInfo['title'] = self._getTitle(response.content)

        self._metaInfo['robots'] = ""
        try:
            response = http.get(self._target.baseURL+"robots.txt")
        except http.ConnectionError:
            pass
        else:
            if response.status_code == 200:
                self._metaInfo['robots'] = response.content



    def _initHandleFuncs(self):
        '''
        初始化匹配函数字典,对应app_figerprint中的match定义
        目前支持:uri, headers, html, requests, robots
        matchFunc定义:
            输入:fingerprint定义
            输出:[match_place,match_str,version_info]
                match_place(必须)为url/headers/html等,match_str(必须)为匹配信息,version_info(必须)为版本信息
                如果没有相关信息则为None
            例如:
                self._matchHeaders(fp)  返回  ['headers_Server', 'Apache/2.4.18', '2.4.18']
        '''
        self._matchFuncs['uri'] = self._matchUri
        self._matchFuncs['headers'] = self._matchHeaders
        self._matchFuncs['html'] = self._matchHtml
        self._matchFuncs['requests'] = self._matchRequests
        self._matchFuncs['robots'] = self._matchRobots



    def identify(self):
        self._result['meta'] = self._metaInfo
        
        for fkey,fvalue in self._fp['Applications'].iteritems():
            bestMatch = []
            for mkey,mvalue in fvalue['matchs'].iteritems():
                try:
                    func = self._matchFuncs[mkey]
                except KeyError:
                    self._log.error("the function which handle {0} dose not exists".format(mkey))
                    continue
                matchResult = func(mvalue)
                if matchResult[1]:
                    if not bestMatch:
                        bestMatch = matchResult
                    else:
                        if matchResult[2] and not bestMatch[2]:
                            bestMatch = matchResult
            if bestMatch:
                for cat in fvalue['cats']:
                    self._result.appendMatch(cat,fkey,bestMatch)
                    #self._result['apps'][cat].append([fkey]+bestMatch)
                if 'implies' in fvalue:
                    try:
                        for i in fvalue['implies']:
                            for cat in self._fp['Applications'][i]['cats']:
                                #self._result['apps'][cat].append([i]+['{0}_implies'.format(fkey),'implies',None])
                                self._result.appendMatch(cat,i,['{0}_implies'.format(fkey),'implies',None])
                    except KeyError:
                        pass

        return self._result



    def _matchUri(self, fp):
        match = re.search(stripPattern(fp), self._metaInfo['target'].uri, re.I)
        if match:
            subMatch = match.groups()[0] if match.groups() else None
            self._log.debug("match uri, {0}, {1}".format(match.group(),subMatch))
            return ['uri', match.group(), subMatch]
        else:
            return ['uri', None, None]


    def _matchHeaders(self, fp):
        bestMatch = []
        for key,value in fp.iteritems():
            match = re.search(stripPattern(value), self._metaInfo['headers'].get(key.lower(),""), re.I)
            if match:
                subMatch = match.groups()[0] if match.groups() else None
                if not bestMatch:
                    bestMatch = ['headers_{0}'.format(key), match.group(), subMatch]
                    self._log.debug("match headers, {0}".format(str(bestMatch)))
                else:
                    if subMatch and not bestMatch[2]:
                        bestMatch = ['headers_{0}'.format(key), match.group(), subMatch]
                        self._log.debug("match headers, {0}".format(str(bestMatch)))

        return bestMatch if bestMatch else ['headers',None,None]


    def _matchHtml(self, fp):
        if isinstance(fp,list):
            bestMatch = []
            for pattern in fp:
                match = re.search(stripPattern(pattern), self._metaInfo['html'], re.I|re.DOTALL)
                if match:
                    subMatch = match.groups()[0] if match.groups() else None
                    if not bestMatch:
                        bestMatch = ['html', match.group(), subMatch]
                        self._log.debug("match html, {0}".format(str(bestMatch)))
                    else:
                        if subMatch and not bestMatch[2]:
                            bestMatch = ['html', match.group(), subMatch]
                            self._log.debug("match html, {0}".format(str(bestMatch)))
            return bestMatch if bestMatch else ['html',None,None]
        else:
            match = re.search(stripPattern(fp), self._metaInfo['html'], re.I|re.DOTALL)
            if match:
                subMatch = match.groups()[0] if match.groups() else None
                self._log.debug("match html, {0}".format(str(['html', match.group(), subMatch])))
                return ['html', match.group(), subMatch]
            else:
                return ['html', None, None]


    def _matchRobots(self, fp):
        if isinstance(fp,list):
            bestMatch = []
            for pattern in fp:
                match = re.search(stripPattern(pattern), self._metaInfo['robots'], re.I|re.DOTALL)
                if match:
                    subMatch = match.groups()[0] if match.groups() else None
                    if not bestMatch:
                        bestMatch = ['robots'.format(key), match.group(), subMatch]
                        self._log.debug("match robots, {0}".format(str(bestMatch)))
                    else:
                        if subMatch and not bestMatch[2]:
                            bestMatch = ['robots'.format(key), match.group(), subMatch]
                            self._log.debug("match robots, {0}".format(str(bestMatch)))
            return bestMatch if bestMatch else ['robots',None,None]
        else:
            match = re.search(stripPattern(fp), self._metaInfo['robots'], re.I|re.DOTALL)
            if match:
                subMatch = match.groups()[0] if match.groups() else None
                self._log.debug("match robots, {0}".format(str(['robots', match.group(), subMatch])))
                return ['robots', match.group(), subMatch]
            else:
                return ['robots', None, None]


    def _matchRequests(self, fp):
        if not self._cmsEnhance:
            return ['requests', None, None]
        matchs = []
        for line in fp:
            uri = self._metaInfo['target'].baseURL.rstrip("/") + line
            try:
                self._log.debug("matchRequests get {0}".format(uri))
                response = http.get(uri, allow_redirects=False)
            except http.ConnectionError:
                continue
            else:
                if response.status_code == 200:
                    if self._notFoundPattern:
                        if self._notFoundPattern in response.content:
                            continue
                        else:
                            self._log.debug("matchRequests got >>> {0}".format(uri))
                            matchs.append(uri)
                    else:
                        self._log.debug("matchRequests got >>> {0}".format(uri))
                        matchs.append(uri)
                else:
                    continue

        if len(matchs) == len(fp):
            return ['requests', str(matchs), None]
        else:
            return ['requests', None, None]