Esempio n. 1
0
    def analysePatterns(self, currentScript, res, identifier, path):
        stop = False

        for detectionPattern in self.detectionPatterns:
            if stop:
                return

            for pattern in detectionPattern.patterns:
                for regEx in pattern.patterns:
                    #pattern can be a list , tuple or str
                    returnValue = PatternChecker.checkPattern(res, regEx, path)
                    result = returnValue[0]

                    if result == -1:  #File contents is corrupt skip this file
                        stop = True
                        return

                    if (result):
                        if currentScript == None:
                            currentScript = Script.Script(identifier, res)

                        self.addScriptToCollection(currentScript,
                                                   detectionPattern, pattern,
                                                   returnValue)

                        if pattern.determinative:
                            stop = True
                            return currentScript
        return currentScript
Esempio n. 2
0
    def analyseCode(self, data, identifier, path):
        res = FileManager.preProcessScript(data)
        currentScript = None
        currentScript = self.analysePatterns(currentScript, res, identifier,
                                             path)

        if currentScript:
            currentScript.calculateDetectionValue()
            if currentScript.score >= 12:
                #if currentScript.score >= 0:
                currentScript.URL = path

                # now that we have a pattern detected .. is it a familiar one (only for unknown detection patterns)?
                if currentScript.checkForRepeatingPatterns:
                    for pattern in self.manuallyFoundLiterals.patterns:
                        result = PatternChecker.checkPattern(
                            res, pattern[1], path)[0]

                        if result:
                            currentScript.addRepeatingPattern(pattern)

                self.scripts.append(currentScript)
                print("\n@append: %s %s %s" %
                      (len(self.scripts), identifier, currentScript.score))
            else:
                del currentScript
Esempio n. 3
0
    def checkParentStyle(self, driver, identifier, element):
        patterns = []
        try:
            parentElement = element.find_element_by_xpath("..")
        except:
            return patterns

        style = self.getElementAtt(parentElement, 'style')
        if style:
            for pattern in self.inlineStylePatterns.parentPatterns:
                for patternValue in pattern[2]:
                    if PatternChecker.checkPattern(style, patternValue,
                                                   identifier)[0]:
                        patterns.append(pattern[1])
                        break

        #check external properties of parent
#        if not found:
#            try:
#                found = self.checkExternalStyle(driver, identifier, parentElement, categories)
#                parentElement = self.parentIsPresent(element)
#            except StaleElementReferenceException:
#                parentElement = self.parentIsPresent(element)
#                print "second attempt", parentElement
#
#                found = self.checkExternalStyle(driver, identifier, parentElement, categories)

        if len(patterns) == 0:  #check parent
            patterns = self.checkParentStyle(driver, identifier, parentElement)

        return patterns
Esempio n. 4
0
 def checkInlineStyle(self, identifier, style):
     patterns = []
     for pattern in self.inlineStylePatterns.patterns:
         for patternValue in pattern[2]:
             if PatternChecker.checkPattern(style, patternValue,
                                            identifier)[0]:
                 patterns.append(pattern[1])
     return patterns
Esempio n. 5
0
def downloadFile(url):
        data = ''
        # TODO: Needs to be switched with a recent user agent each start
        http_header = {
                'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko)\
                Chrome/23.0.1271.64 Safari/537.11',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
                'Accept-Encoding': 'none',
                'Accept-Language': 'en-US,en;q=0.8',
                'Connection': 'keep-alive'}

        # A src attribute may not contain a http(s) prefix
        if url.startswith('//'):
            url = 'http:' + url

        try:
            http = urllib3.PoolManager()
            r = http.request('GET', url, http_header)
        except (urllib3.exceptions.MaxRetryError):
            print("Could not download script at: {}".format(url))
            return

        data = r.data
        contentEncoding = r.getheader("Content-Encoding")

        if contentEncoding:
            data = decompressData(data, contentEncoding, fileName)

        if data == None:
            print("No content found {}".format(url))
            return data

        fileName = _extractFileName(url)
        content = decodeData(data)

        # Exclude common scripts, that are known frameworks and should not do bot detection. Currently: JQuery, bootstrap and underscore
        if not PatternChecker.analyse(fileName, config['excludeFiles'], 'FileManagerExludeFiles', True, True):
           return (content, fileName, url)

        return None
        return

    if data == None:
        print("No content found %s" % (src))
        return data

    fileName = extractFileName(src)

    #de-compress http request content
    if contentEncoding:
        data = decompressData(data, contentEncoding, fileName)

    # decode contents
    content = decodeData(data)

    if not PatternChecker.analyse(fileName, config['excludeFiles'],
                                  'FileManagerExludeFiles', True, True):
        return (content, fileName, src)

    return None


#Preprocess rawdata : 1 remove comment 2 convert hexadecimal contents
def preProcessScript(data):
    #        try:
    #            res = jsbeautifier.beautify(data)
    #        except:
    #            print("Could not beautify %s" % (identifier))
    #            res = data

    #Remove comments
    try:
Esempio n. 7
0
def checkPropertyValueByRegEx(propertyValue, pattern):
    return PatternChecker.analyse(propertyValue, [pattern], 'ExternalStylePattern', False, False)