def analysePatterns(self, currentScript, res, identifier, path): stop = False for detectionPattern in self.detectionPatterns: if stop: return for pattern in detectionPattern.patterns: for regEx in pattern.patterns: #pattern can be a list , tuple or str returnValue = PatternChecker.checkPattern(res, regEx, path) result = returnValue[0] if result == -1: #File contents is corrupt skip this file stop = True return if (result): if currentScript == None: currentScript = Script.Script(identifier, res) self.addScriptToCollection(currentScript, detectionPattern, pattern, returnValue) if pattern.determinative: stop = True return currentScript return currentScript
def analyseCode(self, data, identifier, path): res = FileManager.preProcessScript(data) currentScript = None currentScript = self.analysePatterns(currentScript, res, identifier, path) if currentScript: currentScript.calculateDetectionValue() if currentScript.score >= 12: #if currentScript.score >= 0: currentScript.URL = path # now that we have a pattern detected .. is it a familiar one (only for unknown detection patterns)? if currentScript.checkForRepeatingPatterns: for pattern in self.manuallyFoundLiterals.patterns: result = PatternChecker.checkPattern( res, pattern[1], path)[0] if result: currentScript.addRepeatingPattern(pattern) self.scripts.append(currentScript) print("\n@append: %s %s %s" % (len(self.scripts), identifier, currentScript.score)) else: del currentScript
def checkParentStyle(self, driver, identifier, element): patterns = [] try: parentElement = element.find_element_by_xpath("..") except: return patterns style = self.getElementAtt(parentElement, 'style') if style: for pattern in self.inlineStylePatterns.parentPatterns: for patternValue in pattern[2]: if PatternChecker.checkPattern(style, patternValue, identifier)[0]: patterns.append(pattern[1]) break #check external properties of parent # if not found: # try: # found = self.checkExternalStyle(driver, identifier, parentElement, categories) # parentElement = self.parentIsPresent(element) # except StaleElementReferenceException: # parentElement = self.parentIsPresent(element) # print "second attempt", parentElement # # found = self.checkExternalStyle(driver, identifier, parentElement, categories) if len(patterns) == 0: #check parent patterns = self.checkParentStyle(driver, identifier, parentElement) return patterns
def checkInlineStyle(self, identifier, style): patterns = [] for pattern in self.inlineStylePatterns.patterns: for patternValue in pattern[2]: if PatternChecker.checkPattern(style, patternValue, identifier)[0]: patterns.append(pattern[1]) return patterns
def downloadFile(url): data = '' # TODO: Needs to be switched with a recent user agent each start http_header = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko)\ Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive'} # A src attribute may not contain a http(s) prefix if url.startswith('//'): url = 'http:' + url try: http = urllib3.PoolManager() r = http.request('GET', url, http_header) except (urllib3.exceptions.MaxRetryError): print("Could not download script at: {}".format(url)) return data = r.data contentEncoding = r.getheader("Content-Encoding") if contentEncoding: data = decompressData(data, contentEncoding, fileName) if data == None: print("No content found {}".format(url)) return data fileName = _extractFileName(url) content = decodeData(data) # Exclude common scripts, that are known frameworks and should not do bot detection. Currently: JQuery, bootstrap and underscore if not PatternChecker.analyse(fileName, config['excludeFiles'], 'FileManagerExludeFiles', True, True): return (content, fileName, url) return None
return if data == None: print("No content found %s" % (src)) return data fileName = extractFileName(src) #de-compress http request content if contentEncoding: data = decompressData(data, contentEncoding, fileName) # decode contents content = decodeData(data) if not PatternChecker.analyse(fileName, config['excludeFiles'], 'FileManagerExludeFiles', True, True): return (content, fileName, src) return None #Preprocess rawdata : 1 remove comment 2 convert hexadecimal contents def preProcessScript(data): # try: # res = jsbeautifier.beautify(data) # except: # print("Could not beautify %s" % (identifier)) # res = data #Remove comments try:
def checkPropertyValueByRegEx(propertyValue, pattern): return PatternChecker.analyse(propertyValue, [pattern], 'ExternalStylePattern', False, False)