def parse_line(line): """ :param line: current line read from file :type line: str :return: Issue if match, else None """ match = FINDBUGS_PATTERN.match(line) if match: if DEBUG: print(match.groupdict()) print(match.groups()) match2 = PATH_AND_FUNCTION_PATTERN.match( match.group("bug_path_and_function")) if match2: if DEBUG: print(match.groupdict()) print(match.groups()) message = Message(match.group("bug_message")) # findbugs has no column information point = Point(int(match.group("bug_line_number")), 0) function = Function(match2.group("bug_function_name")) path = match.group("bug_file_name") if match2.group("bug_class_path"): path = match2.group("bug_class_path").replace(".", "/") + "/" + path path = File(path, None) location = Location(path, function, point) return Issue(None, None, location, message, None, None)
def parse_pylint(lines): severity_opts = { 'E': 'error', 'F': 'fatal', 'W': 'warning', 'C': 'convention', 'R': 'refactor' } for line in lines: if LINE_RE.match(line): info = LINE_RE.match(line).groupdict() else: continue severity = severity_opts[info['err']] yield Issue(cwe=None, testid=info['id'], location=Location(file=File(info['path'], None), function=None, point=Point(int(info['line']), int(info['col']))), severity=severity, message=Message(text=info['msg']), notes=None, trace=None)
def make_location_from_range(files, range_): # range_: # e.g.: # [{'col': 18, 'file': 0, 'line': 165}, # {'col': 21, 'file': 0, 'line': 165}] assert len(range_) == 2 start = range_[0] end = range_[1] assert start['file'] == end['file'] if start == end: point = make_point_from_plist_point(start) range_ = None else: point = None range_ = Range(start=make_point_from_plist_point(start), end=make_point_from_plist_point(end)) location = Location( file=File(givenpath=files[start['file']], abspath=None), # FIXME: doesn't tell us function name # TODO: can we patch this upstream? function=Function(''), point=point, range_=range_) return location
def parse_roodi(lines): dic = {'should': 'warning', 'missing': 'error', "Don't": 'critical'} for line in lines: info = LINE_RE.match(line) if info is not None: info.groupdict() testid = str(id(line)) # TODO find a pretty way to remove color escape codes path = info.group('path')[5:] message = info.group('msg')[:-5] for key in dic: if key in info.group('msg'): severity = dic[key] yield Issue(cwe=None, testid=testid, location=Location( file=File(path, None), function=None, point=Point(int(info.group('line')), 1) ), severity=severity, message=Message(message), notes=None, trace=None)
def parse_lintian(lines, fpath): severities = { "w": "warning", "e": "error", "p": "pedantic", "i": "info", "x": "experimental", "o": "override", } for line in lines: if line.startswith("N:"): continue info = LINE_RE.match(line).groupdict() severity = info['severity'].lower() if severity in severities: severity = severities[severity] else: severity = severity.upper() yield Issue(cwe=None, testid=info['testid'], location=Location(file=File(fpath, None), function=None, point=None), severity=severity, message=Message(text=line), notes=None, trace=None)
def parse_plist(pathOrFile, analyzerversion=None, sut=None, file_=None, stats=None): """ Given a .plist file emitted by clang-static-analyzer (e.g. via scan-build), parse it and return an Analysis instance """ plist = plistlib.readPlist(pathOrFile) # We now have the .plist file as a hierarchy of dicts, lists, etc # Handy debug dump: if 0: pprint(plist) # A list of filenames, apparently referenced by index within # diagnostics: files = plist['files'] generator = Generator(name='clang-analyzer', version=analyzerversion) metadata = Metadata(generator, sut, file_, stats) analysis = Analysis(metadata, []) for diagnostic in plist['diagnostics']: if 0: pprint(diagnostic) cwe = None # TODO: we're not yet handling the following: # diagnostic['category'] # diagnostic['type'] message = Message(text=diagnostic['description']) loc = diagnostic['location'] location = Location( file=File(givenpath=files[loc.file], abspath=None), # FIXME: doesn't tell us function name # TODO: can we patch this upstream? function=None, point=Point(int(loc.line), int(loc.col))) notes = None trace = make_trace(files, diagnostic['path']) issue = Issue( cwe, None, # FIXME: can we get at the test id? location, message, notes, trace) analysis.results.append(issue) return analysis
def make_complex_analysis(self): """ Construct a Analysis instance that uses all features """ a = Analysis( metadata=Metadata(generator=Generator(name='cpychecker', version='0.11'), sut=SourceRpm(name='python-ethtool', version='0.7', release='4.fc19', buildarch='x86_64'), file_=File(givenpath='foo.c', abspath='/home/david/coding/foo.c'), stats=Stats(wallclocktime=0.4)), results=[ Issue(cwe=681, testid='refcount-too-high', location=Location(file=File( givenpath='foo.c', abspath='/home/david/coding/foo.c'), function=Function('bar'), point=Point(10, 15)), message=Message(text='something bad involving pointers'), notes=Notes('here is some explanatory text'), trace=Trace([ State(location=Location(file=File('foo.c', None), function=Function('bar'), point=Point(7, 12)), notes=Notes('first we do this')), State(location=Location(file=File('foo.c', None), function=Function('bar'), point=Point(8, 10)), notes=Notes('then we do that')), State(location=Location(file=File('foo.c', None), function=Function('bar'), range_=Range( Point(10, 15), Point(10, 25))), notes=Notes('then it crashes here')) ]), severity='really bad', customfields=CustomFields(foo='bar')), ], customfields=CustomFields( gccinvocation='gcc -I/usr/include/python2.7 -c foo.c'), ) return a, a.results[0]
def parse_file(fileobj, sut=None, file_=None, stats=None): tree = ET.parse(fileobj) root = tree.getroot() node_cppcheck = root.find('cppcheck') version = node_cppcheck.get('version') node_errors = root.find('errors') generator = Generator(name='cppcheck', version=node_cppcheck.get('version')) metadata = Metadata(generator, sut, file_, stats) analysis = Analysis(metadata, []) for node_error in node_errors.findall('error'): # e.g.: # <error id="nullPointer" severity="error" msg="Possible null pointer dereference: end - otherwise it is redundant to check it against null." verbose="Possible null pointer dereference: end - otherwise it is redundant to check it against null."> # <location file="python-ethtool/ethtool.c" line="139"/> # <location file="python-ethtool/ethtool.c" line="141"/> # </error> testid = node_error.get('id') str_msg = node_error.get('msg') str_verbose = node_error.get('verbose') message = Message(text=str_msg) if str_verbose != str_msg: notes = Notes(str_verbose) else: notes = None location_nodes = list(node_error.findall('location')) for node_location in location_nodes: location = Location( file=File(node_location.get('file'), None), # FIXME: doesn't tell us function name # TODO: can we patch this upstream? function=None, # doesn't emit column point=Point(int(node_location.get('line')), 0)) # FIXME: bogus column issue = Issue(None, testid, location, message, notes, None, severity=node_error.get('severity')) analysis.results.append(issue) if not location_nodes: customfields = CustomFields() if str_verbose != str_msg: customfields['verbose'] = str_verbose failure = Failure(failureid=testid, location=None, message=message, customfields=customfields) analysis.results.append(failure) return analysis
def make_state(event): """ Construct a State instance from an event within the JSON """ loc = Location(file=File(givenpath=event['filePathname'], abspath=None), function=None, point=Point(int(event['lineNumber']), int(0))) notes = Notes(text=event['eventDescription']) return State(loc, notes)
def parse_piuparts(lines, path): obj = None info = None cur_msg = "" cat = { "dependency-is-messed-up": [ "you have held broken packages", ], "conffile-stuff-sucks": ["owned by: .+"], "command-not-found": ["command not found|: not found"], "conffile-modified": ["debsums reports modifications inside the chroot"] } def handle_obj(obj): for k, v in cat.items(): for expr in v: if re.findall(expr, cur_msg) != []: obj.testid = k break # if obj.testid is None: # print(cur_msg) # raise Exception return obj for line in lines: if line.startswith(" "): cur_msg += "\n" + line.strip() continue match = LINE_INFO.match(line) if match is None: continue info = match.groupdict() if info['severity'] in ['DEBUG', 'DUMP', 'INFO']: continue if obj: yield handle_obj(obj) cur_msg = "" obj = Issue(cwe=None, testid=None, location=Location(file=File(path, None), function=None, point=None), severity=info['severity'], message=Message(text=""), notes=None, trace=None) if obj: yield handle_obj(obj)
def make_location_from_point(files, loc): # loc: # e.g. {'col': 2, 'file': 0, 'line': 130} location = Location( file=File(givenpath=files[loc['file']], abspath=None), # FIXME: doesn't tell us function name # TODO: can we patch this upstream? function=Function(''), point=make_point_from_plist_point(loc)) return location
def parse_json_v2(path): """ Given a JSON file emitted by: cov-format-errors --json-output-v2=<filename> parse it and return an Analysis instance """ with open(path) as f: js = json.load(f) if 0: pprint(js) generator = Generator(name='coverity') metadata = Metadata(generator, sut=None, file_=None, stats=None) analysis = Analysis(metadata, []) for issue in js['issues']: if 0: pprint(issue) cwe = None # Use checkerName (e.g. "RESOURCE_LEAK") for # the testid: testid = issue['checkerName'] # Use the eventDescription of the final event for the message: message = Message(text=issue['events'][-1]['eventDescription']) location = Location( file=File(givenpath=issue['mainEventFilePathname'], abspath=None), function=Function(name=issue['functionDisplayName']), point=Point(int(issue['mainEventLineNumber']), int(0))) notes = None trace = make_trace(issue) customfields = CustomFields() for key in ['mergeKey', 'subcategory', 'domain']: if key in issue: customfields[key] = issue[key] issue = Issue(cwe, testid, location, message, notes, trace, customfields=customfields) analysis.results.append(issue) return analysis
def parse_BugInstance(bugInstance): message = Message(bugInstance.find("LongMessage").text) # findbugs has no column information sourceLine = bugInstance.find("SourceLine") point = Point(int(sourceLine.get("start")), 0) function = bugInstance.find("Method").find("Message").text tmpIndex = function.rfind("In method ") + len("In method ") - 1 function = Function(function[tmpIndex + 1:]) path = sourceLine.get("sourcepath") path = File(path, None) location = Location(path, function, point) if DEBUG: print(str(location) + " " + str(message)) return Issue(None, None, location, message, None, None)
def parse_pep8(lines): for line in lines: info = LINE_RE.match(line).groupdict() severity = "error" if info['err'].startswith("E") else "warning" yield Issue(cwe=None, testid=info['err'], location=Location(file=File(info['path'], None), function=None, point=Point(*(int(x) for x in (info['line'], info['col'])))), severity=severity, message=Message(text=line), notes=None, trace=None)
def parse_desktop_file_validate(lines): for line in lines: info = LINE_EXPR.match(line).groupdict() path = info['path'] message = info['msg'] severity = info['severity'] yield Issue(cwe=None, testid=None, location=Location(file=File(path, None), function=None, point=None), severity=severity, message=Message(text=message), notes=None, trace=None)
def parse_jshint(lines): for line in lines: if FILE_RE.match(line): path = FILE_RE.match(line).groupdict() if LINE_RE.match(line): info = LINE_RE.match(line).groupdict() else: continue yield Issue(cwe=None, testid=info['testid'], location=Location( file=File(path['file'], None), function=None, point=Point(int(info['line']), int(info['col']))), severity=info['severity'], message=Message(text=info['msg']), notes=None, trace=None)
def make_info(self): a = Analysis( metadata=Metadata(generator=Generator(name='an-invented-checker'), sut=None, file_=None, stats=None), results=[ Info(infoid='gimple-stats', location=Location(file=File('bar.c', None), function=Function('sample_function'), point=Point(10, 15)), message=Message('sample message'), customfields=CustomFields(num_stmts=57, num_basic_blocks=10)) ]) return a, a.results[0]
def parse_perlcritic(lines): for line in lines: info = LINE_EXPR.match(line) if info is None: continue info = info.groupdict() yield Issue(cwe=None, testid=info['testid'], location=Location(file=File(info['file'], None), function=None, point=Point(int(info['line']), int(info['column']))), severity=info['severity'], message=Message(text=info['message']), notes=None, trace=None)
def to_issue(self): """ Generate an Issue from this csv row. """ location = Location(file=File(givenpath=self.file, abspath=None), function=None, # FIXME point=Point(int(self.line), int(self.column))) return Issue(cwe=None, testid=self.flag_name, location=location, message=Message(self.warning_text), notes=Notes(self.additional_text), trace=None, severity=self.priority, customfields=None)
def make_failed_analysis(self): a = Analysis( metadata=Metadata(generator=Generator(name='yet-another-checker'), sut=None, file_=None, stats=None), results=[ Failure(failureid='out-of-memory', location=Location( file=File('foo.c', None), function=Function('something_complicated'), point=Point(10, 15)), message=Message('out of memory'), customfields=CustomFields(stdout='sample stdout', stderr='sample stderr', returncode=-9)) # (killed) ]) return a, a.results[0]
def parse_warning(match_warning): """ :param match_warning: the matched object :type match_warning: SRE_Match :param sut: metadata about the software-under-test :type sut: Sut :return: Issue """ message = Message(match_warning.group('message')) point = Point(int(match_warning.group('line')), 0) path = File(match_warning.group('path'), None) location = Location(file=path, function=None, point=point) return Issue(cwe=None, testid=None, location=location, message=message, notes=None, trace=None)
def parse_findbugs(payload): tree = lxml.etree.fromstring(payload) for result in tree.xpath("//BugCollection/BugInstance/Class/SourceLine"): keys = result.keys() p = None # Start is not always defined if 'start' in keys: line = result.attrib['start'] p = Point(int(line), 0) if 'end' in keys: lineEnd = result.attrib['end'] p = Point(int(line), int(lineEnd)) if 'sourcepath' in keys: path = result.attrib['sourcepath'] else: path = "unknown" message, = result.xpath("../../LongMessage") message = message.text testid = result.getparent().getparent().attrib['instanceHash'] rank = result.getparent().getparent().attrib['rank'] if rank <= 4: severity = "scariest" elif rank <= 9: severity = "scary" elif rank <= 14: severity = "troubling" else: severity = "concern" yield Issue(cwe=None, testid=testid, location=Location(file=File(path, None), function=None, point=p), severity=severity, message=Message(text=message), notes=None, trace=None)
def make_simple_analysis(self): """ Construct a minimal Analysis instance """ a = Analysis( metadata=Metadata(generator=Generator(name='cpychecker'), sut=None, file_=None, stats=None), results=[ Issue(cwe=None, testid=None, location=Location(file=File('foo.c', None), function=None, point=Point(10, 15)), message=Message(text='something bad involving pointers'), notes=None, trace=None) ]) return a, a.results[0]
def parse_adequate(lines): for line in lines: info = OUTPUT_REGEX.match(line).groupdict() testid = info['tag'] severity = "error" pth = info['info'].split(" ", 1) pth = pth[0] if pth else None if pth is None: continue yield Issue(cwe=None, testid=testid, location=Location(file=File(pth, None), function=None, point=None), severity=severity, message=Message(text=line), notes=None, trace=None)
def parse_cppcheck(payload): tree = lxml.etree.fromstring(payload) for result in tree.xpath("//results/error"): if 'file' not in result.attrib: continue path = result.attrib['file'] line = result.attrib['line'] severity = result.attrib['severity'] message = result.attrib['msg'] testid = result.attrib['id'] yield Issue(cwe=None, testid=testid, location=Location( file=File(path, None), function=None, point=Point(int(line), 0) if line else None), severity=severity, message=Message(text=message), notes=None, trace=None)
def parse_warning(line, func_name): """ :param line: current line read from file :type line: basestring :param func_name: name of the current function :type func_name: basestring :param gccversion: version of GCC that generated this report :type gccversion: str :param sut: metadata about the software-under-test :type sut: Sut :return: Issue if match, else None """ match = GCC_PATTERN.match(line) if match: message = Message(match.group('message')) func = Function(func_name) try: column = int(match.group('column')) except ValueError: if match.group('column') == '': column = 0 else: raise except TypeError: column = None switch_match = SWITCH_SUB_PATTERN.match(match.group('switch') or '') if switch_match: switch = switch_match.group('name') else: switch = None point = Point(int(match.group('line')), column) path = File(match.group('path'), None) location = Location(path, func, point) return Issue(None, switch, location, message, None, None)
def parse_file(infile): """ Parser flawfinder output :infile: file-like object :returns: Firehose Analysis object, representing the final XML. Flawfinder can generate multiple cwes for a single issue. Firehose's models does not supports multiple CWEs. For now, when multiple CWEs ocurrs, we get only the first one. A issue was created to track this bug: https://github.com/fedora-static-analysis/firehose/issues/35 """ line = infile.readline() generator = Generator(name='flawfinder', version=get_flawfinder_version(line)) metadata = Metadata(generator, None, None, None) analysis = Analysis(metadata, []) # A regex for "filename:linenum:" ISSUE_LINE_PATTERN = r"(\S.*)\:([0-9]+)\:" # A regex for the reported severity, e.g. "[2]" ISSUE_SEVERITY_PATTERN = r"\[([0-9]+)\]" # A regex for the reported testid, e.g. "(buffer)" ISSUE_TESTID_PATTERN = r"\(([a-z]+)\)" WHITESPACE = "\s+" FIRST_LINE_PATTERN = (ISSUE_LINE_PATTERN + WHITESPACE + ISSUE_SEVERITY_PATTERN + WHITESPACE + ISSUE_TESTID_PATTERN) prog = re.compile(FIRST_LINE_PATTERN) while line: m = prog.match(line) if m: issue_path = m.group(1) issue_line = m.group(2) issue_severity = m.group(3) testid = m.group(4) location = Location(file=File(issue_path, None), function=None, point=Point(int(issue_line), 0)) message_line = infile.readline() issue_message = "" while not prog.search(message_line) and message_line != "\n": # Build up issue_message as one line, stripping out # extraneous whitespace. if issue_message: issue_message += " " + message_line.strip() else: issue_message = message_line.strip() message_line = infile.readline() line = message_line cwes = [int(cwe) for cwe in re.findall("CWE-([0-9]+)", issue_message)] if cwes: first_cwe = int(cwes[0]) else: first_cwe = None issue = Issue(first_cwe, testid, location, Message(text=issue_message), notes=None, trace=None, severity=issue_severity, customfields=None) analysis.results.append(issue) else: line = infile.readline() return analysis
def parse_plist(pathOrFile, analyzerversion=None, sut=None, file_=None, stats=None): """ Given a .plist file emitted by clang-static-analyzer (e.g. via scan-build), parse it and return an Analysis instance """ plist = plistlib.readPlist(pathOrFile) # We now have the .plist file as a hierarchy of dicts, lists, etc # Handy debug dump: if 0: pprint(plist) # A list of filenames, apparently referenced by index within # diagnostics: files = plist['files'] generator = Generator(name='clang-analyzer', version=analyzerversion) metadata = Metadata(generator, sut, file_, stats) analysis = Analysis(metadata, []) if 'clang_version' in plist: generator.version = plist['clang_version'] for diagnostic in plist['diagnostics']: if 0: pprint(diagnostic) cwe = None customfields = CustomFields() for key in ['category', 'issue_context', 'issue_context_kind']: if key in diagnostic: customfields[key] = diagnostic[key] message = Message(text=diagnostic['description']) loc = diagnostic['location'] location = Location( file=File(givenpath=files[loc['file']], abspath=None), # FIXME: doesn't tell us function name # TODO: can we patch this upstream? function=None, point=Point(int(loc['line']), int(loc['col']))) notes = None trace = make_trace(files, diagnostic['path']) issue = Issue( cwe, # Use the 'type' field for the testid: diagnostic['type'], location, message, notes, trace, customfields=customfields) analysis.results.append(issue) return analysis
def honorcxx(package, suite, arch, analysis): chroot_name = '{0}-{1}-honorcxx'.format(suite, arch) with schroot(chroot_name) as chroot: # Let's install the real compilers out, err, code = chroot.run(['apt-get', 'update'], user='******') out_, err, code = chroot.run([ 'apt-get', '-y', '--no-install-recommends', 'install', 'gcc', 'cpp', 'g++' ], user='******') out += out_ out += err # Let's create fake-compiler with chroot.create_file(fake_compiler_path, user='******') as fake_compiler_file: fake_compiler_file.write(fake_gcc) out_, err, code = chroot.run(['chmod', '755', fake_compiler_path], user='******') out += err # Let's create the fake gcc out_, err, code = chroot.run(['rm', '-f', '/usr/bin/gcc'], user='******') out += err out_, err, code = chroot.run(['rm', '-f', '/usr/bin/g++'], user='******') out += err out_, err, code = chroot.run(['rm', '-f', '/usr/bin/cpp'], user='******') out += err with chroot.create_file('/usr/bin/gcc', user='******') as fake_gcc_file: fake_gcc_file.write(fake_gcc) out_, err, code = chroot.run(['chmod', '755', '/usr/bin/gcc'], user='******') out += err for bin in ['gcc', 'g++', 'cpp']: out_, err, code = chroot.run( ['ln', '-s', '/usr/bin/gcc', '/usr/bin/{0}'.format(bin)], user='******') out += err for version in gcc_versions: out_, err, code = chroot.run( ['rm', '-f', '/usr/bin/{0}-{1}'.format(bin, version)], user='******') out += err out_, err, code = chroot.run([ 'ln', '-s', '/usr/bin/gcc', '/usr/bin/{0}-{1}'.format( bin, version) ], user='******') out += err out_, err, code = chroot.run([ 'sh', '-c', 'echo {0}-{1} hold | dpkg --set-selections'.format( bin, version) ], user='******') out += err out_, err, code = chroot.run([ 'sh', '-c', 'echo {0}-{1}-base hold | dpkg --set-selections'.format( bin, version) ], user='******') out += err # cleanup previous result file if os.path.exists('/tmp/no-honor-cxx'): os.remove('/tmp/no-honor-cxx') # let's go out_, _, _ = run_command([ 'sbuild', '-A', '--use-schroot-session', chroot.session, "-v", "-d", suite, "-j", "1", package, ]) out += out_ failed = False if os.path.exists('/tmp/no-honor-cxx'): failed = True # FIXME: firewoes complains, some data might be missing # File "/home/clemux/dev/debian/firewoes/firewoes/lib/debianutils.py", # line 70, in get_source_url # >>> url_quote(message))) # TypeError: %d format: a number is required, not Undefined analysis.results.append( Issue( cwe=None, testid='0', location=Location(file=File('n/a', None), function=None), message=Message(text='Package does not honor CC/CXX'), severity='error', notes=None, trace=None, )) os.remove('/tmp/no-honor-cxx') return (analysis, out, failed, None, None)