def _first_match(self, searchTarget, positiveSearches, negativeSearches, negativeFirst=False): ''' Match object for the first positive match that has no negative matches, with option on which to check first If no positive match (including a negative hit), returns None Otherwise returns keyName of match and the match object Searches dicts have count that is incremented in place ''' if log.level(): log.search(4, "Searching: {}".format(searchTarget)) matchTuple = None if negativeFirst: if not self._is_negative_match(searchTarget, negativeSearches): matchTuple = self._find_positive_match(searchTarget, positiveSearches) else: matchTuple = self._find_positive_match(searchTarget, positiveSearches) if matchTuple: if self._is_negative_match(searchTarget, negativeSearches): matchTuple = None return matchTuple
def _measure_line_impl(self, line, strippedLine): # Make CRC value from the line with whitespace reduced as a potential # duplicate code capture for trival changes self.counts['nbncCRC'][self._activeBlock] = zlib.adler32( line.replace('', ' ').encode(), self.counts['nbncCRC'][self._activeBlock]) # Capture some additional per-line metrics self.counts['Semicolons'][self._activeBlock] += strippedLine.count(';') if self.reImports.search(strippedLine): self.counts['Imports'][self._activeBlock] += 1 if self._logLevel: log.search(3, "import: {}".format(line)) if self.reClass.search(strippedLine): self.counts['Classes'][self._activeBlock] += 1 if self._logLevel: log.search(2, "class: {}".format(line)) if self.rePreprocessor.search(strippedLine): self.counts['Preprocessor'][self._activeBlock] += 1 if self._logLevel: log.search(3, "preprocessor: {}".format(line)) # Skip per-file routine and decision metrics if routines are being measured, # as these will be more accurately captured there if not self.measuringRoutines: if self.reDefaultRoutine.search(strippedLine): self.counts['Routines'][self._activeBlock] += 1 if self._logLevel: log.search(2, "routine: {}".format(line)) decisionLine = line if self._includeStringContent else strippedLine if self._includeStringContent: decisionLine = line if self.reDecision.search(decisionLine): self.counts['Decisions'][self._activeBlock] += 1 if self._logLevel: log.search(3, "decision: {}".format(line))
def add_param(self, param, rawParam): ''' Default implementation for config params assumes a regular expression that may have positive or negative prfeix. Return a tuple of (pos/neg, rawParamStr, compiledRE) ''' positiveSearch = True param = param.strip() if param.startswith(self.NEG_CONFIG_PREFIX): positiveSearch = False param = param[len(self.NEG_CONFIG_PREFIX):] elif param.startswith(self.POS_CONFIG_PREFIX): param = param[len(self.POS_CONFIG_PREFIX):] regEx = re.compile(param, self._searchReFlags) log.search( 2, "Adding {} Search: {} ({})".format(bool(positiveSearch), param, self._searchReFlags)) return (positiveSearch, ' '.join(rawParam.split()), regEx)
def _is_negative_match(self, searchTarget, negativeSearches): for negString, (negRegExp, negCount) in negativeSearches.items(): if log.level(): log.search( 4, " NegativeCheck: {} > {}".format(negRegExp.pattern, searchTarget)) negMatch = negRegExp.search(searchTarget) if negMatch: negativeSearches[negString][1] = negCount + 1 if log.level(): log.search( 4, " NegativeHit: {} > {}".format( str(negMatch.group()), negRegExp.pattern)) return True return False
def _find_positive_match(self, searchTarget, positiveSearches): for posString, (posRegExp, posCount) in positiveSearches.items(): if log.level(): log.search( 4, " PositiveCheck: {} > {}".format(searchTarget, posRegExp.pattern)) match = posRegExp.search(searchTarget) if match: positiveSearches[posString][1] = posCount + 1 if log.level(): log.search( 2, "PositveHit: {} > {}".format(str(match.group()), posRegExp.pattern)) return posString, match return None
def _detect_block_change(self, line, analysis): ''' Check to see if this line is exiting or entering a new block Blcoks do not nest; once in a block, stay in that block until a matching exit RE match is found. None is a valid value for the exit RE, so stay in block until EOF. If a block change happens, call _block_change_event; the analysis argument is to stash any information related to the block change. ''' if not self._use_block_detection: return if self.blockIgnoreFile and self.blockIgnoreFile in line: self._use_block_detection = False return if self.blockChangeIgnore and self.blockChangeIgnore in line: return oldActiveBlock = self._activeBlock # If we're in an active block, check for exiting the block if self._activeBlock > 0: # If the PREVIOUS line was a single-line block, reset # block status and call ourselves again (once, this is not recursive) if self._activeBlockIsSingleLine: self._activeBlockIsSingleLine = False self._activeBlock = 0 self._activeBlockEndRe = None return self._detect_block_change(line, analysis) # Otherwise, normal check for end of block else: endRe = self._activeBlockEndRe if endRe is not None and endRe.search(line): self._activeBlock = 0 self._activeBlockEndRe = None if self._logLevel: log.search( 3, "endblock: {} ==> {}".format(endRe.pattern, line)) # Otherwise check to see if new block starts on this line else: blockNum = 1 blockFound = False while not blockFound and blockNum < len(self.blockDetectors): blockDetector = self.blockDetectors[blockNum] for detector in blockDetector: startRe = detector[self.BLOCK_START] if startRe.search(line): self._activeBlock = blockNum self._activeBlockEndRe = detector[self.BLOCK_END] if self._logLevel: log.search( 3, "startblock: {} ==> {}".format(startRe.pattern, line)) # Note if block closed on the same line if self._activeBlockEndRe is not None and self._activeBlockEndRe.search(line): self._activeBlockIsSingleLine = True if self._logLevel: log.search( 3, "endblockSameline: {} ==> {}".format(self._activeBlockEndRe.pattern, line)) blockFound = True break blockNum += 1 blockChanged = oldActiveBlock != self._activeBlock if blockChanged: self._block_change_event(line, analysis, oldActiveBlock) return blockChanged
def _routine_analyze_impl(self, line, analysis): ''' Identify routine begining by searching for the regular expressions provided in the config file. Assume the current routine ends when the next one is found, while collecting information on a line-by-line basis ''' # Create expanded line and estimate line nesting expandedLine = line.expandtabs(self.routineAvgIndent) indentDepth = len(expandedLine) - len(expandedLine.lstrip()) nestingApprox = int(indentDepth / self.routineAvgIndent) routineNest = nestingApprox - self.currentRoutine['LineIndent'] # Strip literals and assembly comments to avoid mistaken hits strippedLine = self._strip_blanks_and_strings(line) strippedLine = self._strip_inlines(strippedLine) # Is this line the start of a routine? routineStartMatch = self._detect_routine_start(line) if self._current_routine_ended(line, routineStartMatch, indentDepth): self._save_routine_info(analysis, self._activeBlock) self._foundFirstRoutineSinceTransition = True self._reset_routine_counts() # Cache information about this new routine definition self.currentRoutine['Line'] = line self.currentRoutine['LineNum'] = sum(self.counts['RawLines']) self.currentRoutine['LineCol'] = indentDepth self.currentRoutine['LineIndent'] = nestingApprox if routineStartMatch: origPatternStr, match = routineStartMatch self.currentRoutine['Name'] = utils.get_match_string(match) self.currentRoutine['RegEx'] = (origPatternStr, utils.get_match_pattern(match)) self.counts['Routines'][self._activeBlock] += 1 if self._logLevel: log.code(1, "RoutineStart({})=> {}".format( self.currentRoutine['LineNum'], self.currentRoutine['Line'])) log.search(3, " re: {} => name: {}".format( self.currentRoutine['RegEx'][0][:40], self.currentRoutine['Name'])) else: if self._logLevel: log.code(1, "RoutineEnd({})=> {}".format( self.currentRoutine['LineNum'], self.currentRoutine['Line'])) # If there are decision matches for the line complexLine = line if self._includeStringContent else strippedLine if self.reDecision.search(complexLine): if self._logLevel: log.search(2, "decision: {}".format( utils.get_match_string(self.reDecision.search(complexLine)))) self.counts['Decisions'][self._activeBlock] += 1 self.currentRoutine['Decisions'] +=1 # Check for the maximum indentation (as an indication of nesting depth) if routineNest > self.currentRoutine['MaxIndent']: self.currentRoutine['MaxIndent'] = routineNest if self.reEscapes.search(complexLine): if self._logLevel: log.search(3, "escape: {}".format( utils.get_match_string(self.reEscapes.search(complexLine)))) self.currentRoutine['Escapes'] +=1 if self.reCases.search(complexLine): if self._logLevel: log.search(3, "case: {}".format( utils.get_match_string(self.reCases.search(complexLine)))) self.currentRoutine['Cases'] +=1 if self.reBooleans.search(complexLine): if self._logLevel: log.search(3, "boolean: {}".format( utils.get_match_string(self.reBooleans.search(complexLine)))) self.currentRoutine['Booleans'] +=1