def get_latest_patchset(): """Return the PatchSet for the latest commit.""" # regex from https://github.com/PyCQA/pylint/blob/master/pylintrc output = check_output( ['git', 'diff', '-U0', '@~..@']) return PatchSet.from_string( output.replace(b'\r\n', b'\n'), encoding='utf-8')
def scan_diff( self, diff, baseline_filename='', last_commit_hash='', repo_name='', ): """For optimization purposes, our scanning strategy focuses on looking at incremental differences, rather than re-scanning the codebase every time. This function supports this, and adds information to self.data. :type diff: str :param diff: diff string. Eg. The output of `git diff <fileA> <fileB>` :type baseline_filename: str :param baseline_filename: if there are any baseline secrets, then the baseline file will have hashes in them. By specifying it, we can skip this clear exception. :type last_commit_hash: str :param last_commit_hash: used for logging only -- the last commit hash we saved :type repo_name: str :param repo_name: used for logging only -- the name of the repo """ try: patch_set = PatchSet.from_string(diff) except UnidiffParseError: # pragma: no cover alert = { 'alert': 'UnidiffParseError', 'hash': last_commit_hash, 'repo_name': repo_name, } log.error(alert) raise if self.exclude_regex: regex = re.compile(self.exclude_regex, re.IGNORECASE) for patch_file in patch_set: filename = patch_file.path # If the file matches the exclude_regex, we skip it if self.exclude_regex and regex.search(filename): continue if filename == baseline_filename: continue for results, plugin in self._results_accumulator(filename): results.update( self._extract_secrets_from_patch( patch_file, plugin, filename, ), )
def test_patchset_from_bytes_string(self): with codecs.open(self.sample_file, 'rb') as diff_file: diff_data = diff_file.read() ps1 = PatchSet.from_string(diff_data, encoding='utf-8') with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: ps2 = PatchSet(diff_file) self.assertEqual(ps1, ps2)
def get_commit_filechanges(self, repo, sha): resp = self.get( BitbucketAPIPath.repository_diff.format( repo=repo, spec=sha, ), allow_text=True, ) diff_file = resp.text ps = PatchSet.from_string(diff_file) return self.transform_patchset(ps)
def run(project,bugid,patch_no,randoop_tests,tmp_tracefile='tmp_d'): tmp_tracefile+=project+bugid+patch_no+'run_trace_randoop.py' tmp_tracefile=os.path.join(os.getcwd(),tmp_tracefile) w_buggy=project+str(bugid)+'b' w_patched=w_buggy+'_'+patch_no # patchfile=os.path.join('../patches',patch_no) patch = PatchSet.from_filename(patchfile) source_file=patch[0].source_file target_file=patch[0].target_file os.system('cp '+source_file+' '+source_file+'.bak') os.system('cp '+target_file+' '+target_file+'.bak') os.system('make instru_class ARGS="-S '+source_file+' -T '+tmp_tracefile+'"') os.system('make instru_class ARGS="-S '+target_file+ ' -T '+tmp_tracefile+' '+ ' -P '+patchfile+ ' -F '+target_file+'"') # dir_path='../traces/'+w_patched if(os.path.exists(tmp_tracefile)): os.system('rm '+tmp_tracefile) os.system('mkdir '+dir_path) os.system('mkdir '+os.path.join(dir_path,'buggy')) os.system('mkdir '+os.path.join(dir_path,'patched')) test='randoop' # testfile='../test_gen_randoop/'+project+'/randoop/'+bugid+'/'+project+'-'+bugid+'b-randoop.'+bugid+'.tar.bz2' comp_flag=True for Test_Case in randoop_tests: test='Randoop.'+Test_Case.strip() if comp_flag: status=os.system('timeout 90 defects4j test -s '+testfile+' -t '+Test_Case.strip()+' -w '+w_buggy) else: status=os.system('timeout 90 defects4j test -s '+testfile+' -t '+Test_Case.strip()+' -n -w '+w_buggy) if status==0: os.system('mv '+tmp_tracefile+' '+os.path.join(dir_path,'buggy','__'.join(test.split('::')))) if comp_flag: status=os.system('timeout 90 defects4j test -s '+testfile+' -t '+Test_Case.strip()+' -w '+w_patched) else: status=os.system('timeout 90 defects4j test -s '+testfile+' -t '+Test_Case.strip()+' -n -w '+w_patched) if status==0: os.system('mv '+tmp_tracefile+' '+os.path.join(dir_path,'patched','__'.join(test.split('::')))) comp_flag=False os.system('mv '+source_file+'.bak '+source_file) os.system('mv '+target_file+'.bak '+target_file)
def main(): parser = argparse.ArgumentParser(description='Утилита для проверки ошибочно изменных файлов в индексе') parser.add_argument('--version', action='version', version='%(prog)s {}'.format(__version__)) parser.add_argument('-v', '--verbose', dest='verbose_count', action='count', default=0, help='Increases log verbosity for each occurence.') parser.add_argument('--g', action='store_true', default=False, help='Запустить чтение индекса из git и определить список файлов для разбора') args = parser.parse_args() log.setLevel(max(3 - args.verbose_count, 0) * 10) taglistchange = ('<d3p1:id>', '<d3p1:fullIntervalBegin>', '<d3p1:fullIntervalEnd>', '<d3p1:visualBegin>', '<xr:TypeId>', '<xr:ValueId>' ) if args.g is True: files = get_list_of_comitted_files() for file in files: if not file[-12:] == "Template.xml": continue data = get_diff_forfile(file) if data is None: log.error("diff file not exists {}".format(file)) continue pathc = PatchSet.from_filename(data, encoding='utf-8') for f in pathc.modified_files: log.debug('file is {}'.format(f)) modifiedsource, modifiedtarget = [],[] for hunk in f: modifiedsource = modifiedsource + list(filter(lambda x: not x[:1] == " ", hunk.source)) modifiedtarget = modifiedtarget + list(filter(lambda x: not x[:1] == " ", hunk.target)) sourcetags = list(filter(lambda x: x[1:].strip().startswith(taglistchange), modifiedsource)) targettags = list(filter(lambda x: x[1:].strip().startswith(taglistchange), modifiedtarget)) log.debug(sourcetags) log.debug(targettags) if not (len(sourcetags) == len(modifiedsource) and \ len(targettags) == len(modifiedtarget) and \ len(sourcetags) == len(targettags)): continue #Теперь надо будет отменить изменения в индексе для файла. git_reset_file(file, 'HEAD') break replace_old_form_attr(files)
def run(project,bugid,patch_no,tmp_tracefile='tmp_a'): #v=line[2:].split(' ') #project=v[0] #bugid=v[1] tmp_tracefile+=project+bugid+patch_no+'get_randoop_coverage' tmp_tracefile=os.path.join(os.getcwd(),tmp_tracefile) w_buggy=project+bugid+'b' test='randoop' # testfiledir='../test_gen_randoop/'+project+'/randoop/'+bugid+'/' targetfile=testfiledir+project+'-'+bugid+'b-randoop.'+bugid+'.instr.tar.bz2' testfile=testfiledir+project+'-'+bugid+'b-randoop.'+bugid+'.tar.bz2' targetdir=testfiledir+'suite' os.system('mkdir '+targetdir) os.system('tar xvf '+testfile+' -C '+targetdir) os.system('make TestCaseInstr ARGS="'+targetdir+' '+tmp_tracefile+' Randoop"') os.system('cd '+targetdir+' && tar -c ./* | bzip2 > ../'+project+'-'+bugid+'b-randoop.'+bugid+'.instr.tar.bz2') #patch_no=line[3:].split(' ')[1][1:] #print(patch_no) #os.system('cp -r '+v+' '+v+'_Patch'+line[3:].split(' ')[1][1:]) #print(line) #if line[0]=='[': # bug_loc=line.split(']')[0][1:] #else : # bug_loc=line.split(' (')[0] #bug_loc=bug_loc.split/(':') #bug_loc=bug_loc[0].replace('.','/')+'.java:'+bug_loc[1] print(w_buggy+'_'+patch_no) patch = PatchSet.from_filename('../patches/'+patch_no) souce_file_list=[] for filei in range(len(patch)): source_file=patch[filei].source_file souce_file_list.append(source_file) line_no_list=[] for hunki in range(len(patch[filei])): for i in range(len(patch[filei][hunki])): if not patch[filei][hunki][i].is_context: line_no_list.append(str(patch[filei][hunki][i-1].source_line_no+1)) break os.system('cp '+source_file+' '+source_file+'.bak') os.system('make MthdInstr ARGS="'+source_file+' '+tmp_tracefile+' '+','.join(line_no_list)+'"') if(os.path.exists(tmp_tracefile)): os.system('rm '+tmp_tracefile) os.system('defects4j test -s '+targetfile+' -w '+w_buggy) os.system('mv '+tmp_tracefile+' ../randoop_cover/'+w_buggy+'_'+patch_no+'.txt') for source_file in souce_file_list: os.system('rm '+source_file) os.system('mv '+source_file+'.bak '+source_file) os.system('rm -rf '+w_buggy) os.system('defects4j checkout -p '+project+' -v '+bugid+'b -w '+project+bugid+'b')
def get_commit_filechanges(self, repo, sha): # returns unidiff file resp = self.get( '/2.0/repositories/{}/diff/{}'.format( repo, sha, ), allow_text=True, ) diff_file = resp.text ps = PatchSet.from_string(diff_file) return self.transform_patchset(ps)
def get_vulnerable_lines(dir): flaw_dict = {} patch_file = join(dir, 'patch/file.patch') patch = PatchSet.from_filename(patch_file, encoding='latin-1') for file in patch: filename = abspath(join(dir, 'app', file.path)) # Produces a list of vulnerable line numbers vulnerable_lines = [ line.source_line_no for hunk in file for line in hunk if line.is_removed ] flaw_dict[filename] = vulnerable_lines return flaw_dict
def test_parse_sample(self): """Parse sample file.""" with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: res = PatchSet(diff_file) # three file in the patch self.assertEqual(len(res), 3) # three hunks self.assertEqual(len(res[0]), 3) # first file is modified self.assertTrue(res[0].is_modified_file) self.assertFalse(res[0].is_removed_file) self.assertFalse(res[0].is_added_file) # Hunk 1: five additions, no deletions, a section header self.assertEqual(res[0][0].added, 6) self.assertEqual(res[0][0].removed, 0) self.assertEqual(res[0][0].section_header, 'Section Header') # Hunk 2: 2 additions, 8 deletions, no section header self.assertEqual(res[0][1].added, 2) self.assertEqual(res[0][1].removed, 8) self.assertEqual(res[0][1].section_header, '') # Hunk 3: four additions, no deletions, no section header self.assertEqual(res[0][2].added, 4) self.assertEqual(res[0][2].removed, 0) self.assertEqual(res[0][2].section_header, '') # Check file totals self.assertEqual(res[0].added, 12) self.assertEqual(res[0].removed, 8) # second file is added self.assertFalse(res[1].is_modified_file) self.assertFalse(res[1].is_removed_file) self.assertTrue(res[1].is_added_file) # third file is removed self.assertFalse(res[2].is_modified_file) self.assertTrue(res[2].is_removed_file) self.assertFalse(res[2].is_added_file) self.assertEqual(res.added, 21) self.assertEqual(res.removed, 17)
def fetch_changed_files(self): if not self.diff_url: raise Exception("Diff URL cannot be find for event") response = get_with_retries( self.diff_url, sleep=RETRY_SLEEP, ) response.raise_for_status() if "commits" in self.event and self.number == 0: diff = response.json() if "files" in diff: self.changed_files = [f["filename"] for f in diff["files"]] else: diff_object = PatchSet(response.text) self.changed_files = {f.path for f in diff_object}
def main(): args = parse_args() if not args.commits: exit("missing commit range") # pylint does not like the 'sh' library # pylint: disable=too-many-function-args,unexpected-keyword-arg commit = sh.git("diff", args.commits, **sh_special_args) patch_set = PatchSet(commit) zephyr_base = os.getenv("ZEPHYR_BASE") violations = {} numViolations = 0 for f in patch_set: if not f.path.endswith(".c") and not f.path.endswith(".h") or not os.path.exists(zephyr_base + "/" + f.path): continue for script in coccinelle_scripts: script_path = os.getenv("ZEPHYR_BASE") + "/" + script cocci = sh.coccicheck( "--mode=report", "--cocci=" + script_path, f.path, **sh_special_args) parse_coccinelle(cocci, violations) for hunk in f: for line in hunk: if line.is_added: violation = "{}:{}".format(f.path, line.target_line_no) if violation in violations: numViolations += 1 if args.output: with open(args.output, "a+") as fp: fp.write("{}:{}\n".format( violation, "\t\n".join( violations[violation]))) else: print( "{}:{}".format( violation, "\t\n".join( violations[violation]))) return numViolations
def get_patched_class(patch_no): patchfile=os.path.join('../../patches',patch_no) patch = PatchSet.from_filename(patchfile,encoding='utf-8') source_file=patch[0].source_file #print(source_file) line_no_list=[] tmp_file='tmp_result'+patch_no for hunki in range(len(patch[0])): for i in range(len(patch[0][hunki])): if not patch[0][hunki][i].is_context: line_no_list.append(str(patch[0][hunki][i-1].source_line_no+1)) break os.system('cd .. && make PatchInfo ARGS="'+os.path.join('../source/',source_file)+' '+tmp_file+' '+','.join(line_no_list)+'" >/dev/null') f=open('../'+tmp_file) res=f.readlines()[0].strip() f.close() os.system('rm ../'+tmp_file) return res
def _get_added(cls, diff): patches = PatchSet(StringIO(diff)) diff_contents = [] for p in patches: if p.added > 0: contents = [] for h in p: added = [] for i, line in enumerate(h): if line.is_added: added_line = Line(line.target_line_no, line.value, i + 1) added.append(added_line) contents += added diff_contents.append(DiffContent(p.path, contents)) return diff_contents
def test_samples(self): tests_dir = os.path.dirname(os.path.realpath(__file__)) for fname in self.samples: file_path = os.path.join(tests_dir, 'samples', fname) with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: res = PatchSet(diff_file) # 3 files updated by diff self.assertEqual(len(res), 3) # 1 added file added_files = res.added_files self.assertEqual(len(added_files), 1) self.assertEqual(added_files[0].path, 'added_file') # 1 hunk, 4 lines self.assertEqual(len(added_files[0]), 1) self.assertEqual(added_files[0].added, 4) self.assertEqual(added_files[0].removed, 0) # 1 removed file removed_files = res.removed_files self.assertEqual(len(removed_files), 1) self.assertEqual(removed_files[0].path, 'removed_file') # 1 hunk, 3 removed lines self.assertEqual(len(removed_files[0]), 1) self.assertEqual(removed_files[0].added, 0) self.assertEqual(removed_files[0].removed, 3) # 1 modified file modified_files = res.modified_files self.assertEqual(len(modified_files), 1) self.assertEqual(modified_files[0].path, 'modified_file') # 1 hunk, 3 added lines, 1 removed line self.assertEqual(len(modified_files[0]), 1) self.assertEqual(modified_files[0].added, 3) self.assertEqual(modified_files[0].removed, 1) self.assertEqual(res.added, 7) self.assertEqual(res.removed, 4) # check that original diffs and those produced # by unidiff are the same with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: self.assertEqual(diff_file.read(), str(res))
def parse_diff(lines): is_merge = False is_valid_sig = False commit_hash = None diff_lines = [] for line in lines: if line.startswith("commit"): _, commit_hash = line.split() if diff_lines: diff_lines.append(line) # get the rest of the diffs afte the first one if line.startswith('diff') and not diff_lines: diff_lines = [line] patch = PatchSet(diff_lines) return commit_hash, is_valid_sig, is_merge, patch
def test_flake8_lint_a_py(app, caplog): diff = """diff --git a/a.py b/a.py new file mode 100644 index 0000000..fdeea15 --- /dev/null +++ b/a.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + + +def add(a, b): + return a+ b """ context = TestContext('deepanalyzer/badwolf', None, 'pullrequest', 'message', {'commit': { 'hash': '000000' }}, {'commit': { 'hash': '111111' }}, pr_id=1) spec = Specification() spec.linters.append(ObjectDict(name='flake8', pattern=None)) lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'flake8')) patch = PatchSet(diff.split('\n')) with mock.patch.object(lint, 'load_changes') as load_changes,\ mock.patch.object(lint, 'update_build_status') as build_status,\ mock.patch.object(lint, '_report') as report: load_changes.return_value = patch build_status.return_value = None report.return_value = None lint.problems.set_changes(patch) lint.process() assert load_changes.called assert len(lint.problems) == 1 problem = lint.problems[0] assert problem.filename == 'a.py' assert problem.line == 6
def create_annotations_from_patch(patch): # This is where the magic happens, a unified diff formatted patch # as for example created by git diff get parsed and gets converted # into annotations annotations = list() try: pset = PatchSet(patch) for mod_files in pset.modified_files: # Parse hunks , create annotation for hunk in mod_files: a = Annotation(mod_files.path, hunk.source_start, hunk.target_start) offset = 0 for line in hunk.target: if line[0] == '+': if a.start_new_annotation(): # A single hunk can contain multiple annotations we create and annotation # object for every section of added content annotations.append(_post_process_annotation(a)) b = Annotation(mod_files.path, a.source_start, a.target_start) # copy lines from the previous annotation for context [b.addContext(item, False) for item in a.context] a = b a.addContext([ hunk.target_start + offset, format(line[1:].rstrip('\n')) ], True) elif line[0] not in ['+', '-']: a.addContext([ hunk.source_start + offset, format(line[1:].rstrip('\n')) ], False) #keep track of the offset compared to the hunk if line[0] != '-': offset += 1 annotations.append(_post_process_annotation(a)) except Exception: traceback.print_exc(file=sys.stdout) sys.exit(2) return annotations
def commit(self, request): template = self.view.get_template("repo/commit.html") repo = Repo.retrieve(name=request.params['repo']) entity = repo.entity commit = entity.commit(request.params['commit']) diff_str = entity.git.diff( str(commit) + '~1', commit, ignore_blank_lines=True, ignore_space_at_eol=True) if len(commit.parents) > 0 else None diff = None if diff_str: diff = PatchSet(diff_str) return Response.render(template, { 'repo': repo, 'entity': entity, 'commit': commit, 'diff': diff })
def _iterate_commits(self, storage: Storage, start_from=0, stop_at=inf): query: GithubQuery = self.query skip_n = start_from if CACHE_THE_DATA: for i, commit in enumerate(self.load_commits(storage)): if i == 0: print("SOME OF THE DATA WERE LOADED FROM CACHE!") if not self._in_bounds(i, start_from, stop_at): continue yield commit skip_n += 1 for i, commit in enumerate(query.repo_iterate_commits()): if i < skip_n or not self._in_bounds(i, start_from, stop_at): continue cobj = self._create_commit(commit) # ~~~~~~~~~ save patches mapping ~~~~~~~~~~~~~~~~ # `git show` doesn't show merges details so here we had to # give it a 2-commits diff to show (unless it is the first commit) show_str = f"{commit.parents[0]}..{commit}" if commit.parents else commit patch_by_files = PatchSet( query.repo.git.show(show_str, first_parent=True)) pfiles: Dict[str, PatchedFile] = {pf.path: pf for pf in patch_by_files} # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~ convert the stats ~~~~~~~~~~~~~~~~ for fname in commit.stats.files.keys(): fc = self._create_file_changeset(fname, pfiles, commit.hexsha) cobj.files.append(fc) cobj.files.sort() # place "RENAME" before others # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if CACHE_THE_DATA: storage.save_obj(cobj) yield cobj ### END storage.dispose()
def fetch_changed_files(self): if not getattr(self, "diff_urls", False): raise TypeError("The event does not have diff URLs") for diff_url in self.diff_urls: response = get_with_retries( diff_url, sleep=RETRY_SLEEP, ) response.raise_for_status() if "commits" in self.event and self.number == 0: diff = response.json() if "files" in diff: self.changed_files = {f["filename"] for f in diff["files"]} else: diff_object = PatchSet(response.text) self.changed_files.update({f.path for f in diff_object}) print(f"Fetched info about {len(self.changed_files)} changed files")
def test_mypy_lint_a_py(app, caplog): diff = """diff --git a/a.py b/a.py new file mode 100644 index 0000000..87604af --- /dev/null +++ b/a.py @@ -0,0 +1,5 @@ +def p() -> None: + print('hello') + + +a = p() """ context = Context( 'deepanalyzer/badwolf', None, 'pullrequest', 'message', {'commit': {'hash': '000000'}}, {'commit': {'hash': '111111'}}, pr_id=1 ) spec = Specification() spec.linters.append(ObjectDict(name='mypy', pattern=None)) lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'mypy')) patch = PatchSet(diff.split('\n')) with mock.patch.object(lint, 'load_changes') as load_changes,\ mock.patch.object(lint, 'update_build_status') as build_status,\ mock.patch.object(lint, '_report') as report: load_changes.return_value = patch build_status.return_value = None report.return_value = (1, 2) lint.problems.set_changes(patch) lint.process() assert load_changes.called assert len(lint.problems) == 1 problem = lint.problems[0] assert problem.line == 5 assert problem.filename == 'a.py'
def extract_single_line_patches(zip_path): logger.info( f"Extracting examples from {zip_path}, this might take a while.") examples = [] with zipfile.ZipFile(zip_path) as rawzip: # I think the first member is the base dir base_path = rawzip.infolist()[0].filename info_pairs = get_buggy_pairs(rawzip) logger.info(f"Found {len(info_pairs)} buggy java files.") logger.info("Filtering single line patches...") for buggyinfo, fixedinfo in tqdm( info_pairs, desc="Progress", disable=not logger.isEnabledFor(logging.INFO), unit="files", ): with rawzip.open(buggyinfo) as buggyfile, rawzip.open( fixedinfo) as fixedfile: buggy_code = decode(buggyfile.read()) fixed_code = decode(fixedfile.read()) diff = list( unified_diff( buggy_code.splitlines(keepends=True), fixed_code.splitlines(keepends=True), fromfile=buggyinfo.filename, tofile=fixedinfo.filename, )) patch = PatchSet(diff) for file_patch in patch: if is_single_line(file_patch): fixed_line, lineno = get_fixed_line(file_patch) id_ = str( PurePosixPath( buggyinfo.filename).relative_to(base_path)) examples.append( SingleLineFixExample(id_, buggy_code, fixed_line, lineno)) # Hopefully this frees some memory del rawzip return examples
def run(project,bugid,patch_no,tmp_tracefile='tmp_b'): w_buggy=project+bugid+'b' test='randoop' tmp_tracefile+=project+bugid+patch_no+'get_test_coverage' tmp_tracefile=os.path.join(os.getcwd(),tmp_tracefile) # testdir=os.path.join(w_buggy,get_path_to_test(w_buggy)) os.system('cp -r '+testdir+' '+testdir+'_bak') os.system('make TestCaseInstr ARGS="'+testdir+' '+tmp_tracefile+' '+project+'"') print(w_buggy+'_'+patch_no) # patch = PatchSet.from_filename('../patches/'+patch_no) souce_file_list=[] for filei in range(len(patch)): source_file=patch[filei].source_file souce_file_list.append(source_file) line_no_list=[] for hunki in range(len(patch[filei])): for i in range(len(patch[filei][hunki])): if not patch[filei][hunki][i].is_context: line_no_list.append(str(patch[filei][hunki][i-1].source_line_no+1)) break os.system('cp '+source_file+' '+source_file+'.bak') os.system('make MthdInstr ARGS="'+source_file+' '+tmp_tracefile+' '+','.join(line_no_list)+'"') os.system('defects4j compile -w '+w_buggy) if(os.path.exists(tmp_tracefile)): os.system('rm '+tmp_tracefile) os.system('defects4j test -n -r -w '+w_buggy) os.system('mv '+tmp_tracefile+' ../test_coverage/'+w_buggy+'_'+patch_no+'.txt') for source_file in souce_file_list: os.system('rm '+source_file) os.system('mv '+source_file+'.bak '+source_file) os.system('rm -rf '+testdir) os.system('mv '+testdir+'_bak '+testdir) os.system('rm -rf '+w_buggy) os.system('defects4j checkout -p '+project+' -v '+bugid+'b -w '+project+bugid+'b')
def __init__(self, filename, strict=False): self.filename = filename diff = PatchSet.from_filename(filename) date = None author = None with open(self.filename, 'r') as f: lines = f.read().splitlines() lines = list(takewhile(lambda line: line != '---', lines)) for line in lines: if line.startswith(DATE_PREFIX): date = parse(line[len(DATE_PREFIX):]) elif line.startswith(FROM_PREFIX): author = GitCommit.format_git_author(line[len(FROM_PREFIX):]) header = list(takewhile(lambda line: line != '', lines)) body = lines[len(header) + 1:] modified_files = [] for f in diff: # Strip "a/" and "b/" prefixes source = f.source_file[2:] target = f.target_file[2:] if f.is_added_file: t = 'A' elif f.is_removed_file: t = 'D' elif f.is_rename: # Consider that renamed files are two operations: the deletion # of the original name and the addition of the new one. modified_files.append((source, 'D')) t = 'A' else: t = 'M' modified_files.append((target, t)) super().__init__(None, date, author, body, modified_files, strict=strict, commit_to_date_hook=lambda x: date)
def test_bandit_lint_a_py(app, caplog): diff = """diff --git a/a.py b/a.py new file mode 100644 index 0000000..719cd56 --- /dev/null +++ b/a.py @@ -0,0 +1,4 @@ +try: + a = 1 +except Exception: + pass """ context = TestContext('deepanalyzer/badwolf', None, 'pullrequest', 'message', {'commit': { 'hash': '000000' }}, {'commit': { 'hash': '111111' }}, pr_id=1) spec = Specification() spec.linters.append(ObjectDict(name='bandit')) lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'bandit')) patch = PatchSet(diff.split('\n')) with mock.patch.object(lint, 'load_changes') as load_changes,\ mock.patch.object(lint, 'update_build_status') as build_status,\ mock.patch.object(lint, '_report') as report: load_changes.return_value = patch build_status.return_value = None report.return_value = None lint.problems.set_changes(patch) lint.process() assert load_changes.called assert len(lint.problems) == 1 problem = lint.problems[0] assert problem.filename == 'a.py' assert problem.line == 3 assert not problem.is_error
def test_git_renaming(self): tests_dir = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(tests_dir, 'samples/git_rename.diff') with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: res = PatchSet(diff_file) self.assertEqual(len(res), 1) patch = res[0] self.assertTrue(patch.is_rename) self.assertEqual(patch.added, 1) self.assertEqual(patch.removed, 1) self.assertEqual(len(res.modified_files), 1) self.assertEqual(len(res.added_files), 0) self.assertEqual(len(res.removed_files), 0) # check that original diffs and those produced # by unidiff are the same with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: self.assertEqual(diff_file.read(), str(res))
def test_yamllint_a_yml(app, caplog): diff = """diff --git a/a.yml b/a.yml new file mode 100644 index 0000000..1eccee8 --- /dev/null +++ b/a.yml @@ -0,0 +1,3 @@ +--- +a: 1 +a: 2 """ context = TestContext('deepanalyzer/badwolf', None, 'pullrequest', 'message', {'commit': { 'hash': '000000' }}, {'commit': { 'hash': '111111' }}, pr_id=1) spec = Specification() spec.linters.append(ObjectDict(name='yamllint', pattern=None)) lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'yamllint')) patch = PatchSet(diff.split('\n')) with mock.patch.object(lint, 'load_changes') as load_changes,\ mock.patch.object(lint, 'update_build_status') as build_status,\ mock.patch.object(lint, '_report') as report: load_changes.return_value = patch build_status.return_value = None report.return_value = None lint.problems.set_changes(patch) lint.process() assert load_changes.called assert len(lint.problems) == 1 problem = lint.problems[0] assert problem.filename == 'a.yml' assert problem.line == 3
def test_eslint_lint_a_js(app, pr_context): diff = """diff --git a/.eslintrc b/.eslintrc new file mode 100644 index 0000000..45e5d69 --- /dev/null +++ b/.eslintrc @@ -0,0 +1,5 @@ +{ + "rules": { + "quotes": [2, "single"] + } +} diff --git a/a.js b/a.js new file mode 100644 index 0000000..f119a7f --- /dev/null +++ b/a.js @@ -0,0 +1 @@ +console.log("bar") """ spec = Specification() spec.linters.append(ObjectDict(name='eslint', pattern=None)) lint = LintProcessor(pr_context, spec, os.path.join(FIXTURES_PATH, 'eslint')) patch = PatchSet(diff.split('\n')) with mock.patch.object(lint, 'load_changes') as load_changes,\ mock.patch.object(lint, 'update_build_status') as build_status,\ mock.patch.object(lint, '_report') as report: load_changes.return_value = patch build_status.return_value = None report.return_value = (1, 2) lint.problems.set_changes(patch) lint.process() assert load_changes.called assert len(lint.problems) == 1 problem = lint.problems[0] assert problem.filename == 'a.js' assert problem.line == 1
def check_diff(self): diff_file = requests.get(self.pr_info['diff_url'], auth=API_AUTH).text diff = PatchSet(diff_file) fcount = diff_file.count("diff --git") if fcount > 1: self.add_invalid( 'More than one file has been added/removed/modified.') return elif fcount < 1: self.add_invalid( 'Less than one file has been added/removed/modified.') return elif diff[0].is_modified_file: self.add_attention('This file has modifies a pre-existing file.') return new_file = self.parse_diff(str(diff[0]).split('\n')) return {'lines': new_file, 'diff': diff}
def __init__(self): self.current_time = int(time.time()) self.repo = git.Repo(".") self.prior_commit = self._starting_commit(COMMIT_AGE) self.current_commit = self.repo.commit("HEAD") self.diff_raw = self.repo.git.diff(self.prior_commit, self.current_commit, ignore_blank_lines=True, ignore_space_at_eol=True) self.patch = PatchSet(self.diff_raw) self.filenames = {} self.updates = {} self.updates["authors"] = [] self.updates["author_count"] = 0 self.updates["entities"] = [] self.updates["pandas"] = [] self.updates["panda_count"] = 0 self.updates["photos"] = [] self.updates["zoos"] = [] self.updates["zoo_count"] = 0 self.create_updates()
def is_constitutional(diff_str): with open(settings.SITE_ROOT + '/elections/constitution.json') as f: constitution = json.load(f) patch = PatchSet(diff_str) matched_files = [] for file in patch: if file.path in constitution: # File removed or renamed if file.is_removed_file or (file.source_file.split('/')[-1] != file.target_file.split('/')[-1]): matched_files.append(file.path) locks = constitution[file.path] # Entire file included in constitution if locks is None: matched_files.append(file.path) # File overlaps with at least one protected chunk if _check_hunks(file, locks): matched_files.append(file.path) return matched_files
def test_jsonlint_a_json_changes_in_range(app, caplog): diff = """diff --git a/b.json b/b.json index 6ebebfe..6be8d74 100644 --- a/b.json +++ b/b.json @@ -1,3 +1,4 @@ { "a": 1 + "b": 2 } """ context = Context( 'deepanalyzer/badwolf', None, 'pullrequest', 'message', {'commit': {'hash': '000000'}}, {'commit': {'hash': '111111'}}, pr_id=1 ) spec = Specification() spec.linters.append(ObjectDict(name='jsonlint', pattern=None)) lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'jsonlint')) patch = PatchSet(diff.split('\n')) with mock.patch.object(lint, 'load_changes') as load_changes,\ mock.patch.object(lint, 'update_build_status') as build_status,\ mock.patch.object(lint, '_report') as report: load_changes.return_value = patch build_status.return_value = None report.return_value = (1, 2) lint.problems.set_changes(patch) lint.process() assert load_changes.called assert len(lint.problems) == 1 problem = lint.problems[0] assert problem.filename == 'b.json' assert problem.line == 2
def test_shellcheck_a_sh(app, caplog): diff = """diff --git a/a.sh b/a.sh new file mode 100644 index 0000000..9fb9840 --- /dev/null +++ b/a.sh @@ -0,0 +2 @@ +#!/bin/sh +$foo=42 """ context = Context('deepanalyzer/badwolf', None, 'pullrequest', 'message', {'commit': { 'hash': '000000' }}, {'commit': { 'hash': '111111' }}, pr_id=1) spec = Specification() spec.linters.append(ObjectDict(name='shellcheck', pattern=None)) lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'shellcheck')) patch = PatchSet(diff.split('\n')) with mock.patch.object(lint, 'load_changes') as load_changes,\ mock.patch.object(lint, 'update_build_status') as build_status,\ mock.patch.object(lint, '_report') as report: load_changes.return_value = patch build_status.return_value = None report.return_value = None lint.problems.set_changes(patch) lint.process() assert load_changes.called assert len(lint.problems) > 0 problem = lint.problems[0] assert problem.filename == 'a.sh' assert problem.line == 2
def test_sasslint_lint_a_scss(app, caplog): diff = """diff --git a/a.scss b/a.scss new file mode 100644 index 0000000..48b3ebe --- /dev/null +++ b/a.scss @@ -0,0 +1,3 @@ +.test { + background-color: "#FFF" +} """ context = Context('deepanalyzer/badwolf', None, 'pullrequest', 'message', {'commit': { 'hash': '000000' }}, {'commit': { 'hash': '111111' }}, pr_id=1) spec = Specification() spec.linters.append(ObjectDict(name='sasslint', pattern=None)) lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'sasslint')) patch = PatchSet(diff.split('\n')) with mock.patch.object(lint, 'load_changes') as load_changes,\ mock.patch.object(lint, 'update_build_status') as build_status,\ mock.patch.object(lint, '_report') as report: load_changes.return_value = patch build_status.return_value = None report.return_value = None lint.problems.set_changes(patch) lint.process() assert load_changes.called assert len(lint.problems) == 3 problem = lint.problems[0] assert problem.filename == 'a.scss'
def predict_failing_tests(path, predictor, encoder, test_ids_to_test_names): # Get Change from filename repo = Repo(path) diff = repo.git.diff(repo.head, None, "--unified=0") patchset = PatchSet(diff) # Analyze all the different changes mutants = [] for patchedFile in patchset: for hunk in patchedFile: line_difference = 0 for line in hunk: line_difference += 1 if line.is_removed: mutants.append({ "modified_file_path": patchedFile. target_file[2:], # the [2:] removes unwanted prefixes # 'previous_line': str(line)[2:], "line_number_changed": hunk.source_start + line_difference, }) test_ids = test_ids_to_test_names.index mutants_with_test_ids = [] for mutant in mutants: for test_id in test_ids: mutant["test_id"] = test_id mutants_with_test_ids.append(mutant.copy()) mutants_with_test_ids = DataFrame(mutants_with_test_ids) mutants_with_test_ids mutants_with_test_ids[encoded_column_names] = encoder.transform( mutants_with_test_ids[encoded_column_names]) mutants_with_test_ids["prediction"] = predictor.predict( mutants_with_test_ids) # Union all predicted failures prediction_per_test_id = mutants_with_test_ids.groupby( "test_id").all()["prediction"] return prediction_per_test_id[prediction_per_test_id == False].index
def main(commit_sha, comments_url, github_token): repository = git.Repo() uni_diff_text = repository.git.diff(unified=0) patch_set = PatchSet(StringIO(uni_diff_text)) hunks = [] for patched_file in patch_set: file_path = patched_file.path for hunk in patched_file: # line 0 is not accepted start = hunk.source_start or 1 hunk_json = { "start_line": start, "line": start + hunk.source_length - 1, "body": comment_body(hunk), "path": file_path, "commit_id": commit_sha, "side": "RIGHT", "start_side": "RIGHT", } hunks.append(hunk_json) for hunk in hunks: headers = { "Authorization": f"token {github_token}", "Content-Type": "application/json", "Accept": "application/vnd.github.comfort-fade-preview+json", } if hunk["start_line"] >= hunk["line"]: # Single line comment headers.pop("Accept") hunk["line"] = hunk.pop("start_line") print(f"Making request: {hunk}") resp = requests.post(comments_url, json=hunk, headers=headers) if resp.status_code >= 400: print(f"Error: {resp.content.decode()}")
def update_copyright(data): current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d') username = subprocess.check_output('git config user.name', shell=True, encoding='utf8').strip() email = subprocess.check_output('git config user.email', shell=True, encoding='utf8').strip() changelogs = set() diff = PatchSet(data) for file in diff: changelog = os.path.join(find_changelog(file.path), 'ChangeLog') if changelog not in changelogs: changelogs.add(changelog) with open(changelog) as f: content = f.read() with open(changelog, 'w+') as f: f.write(f'{current_timestamp} {username} <{email}>\n\n') f.write('\tUpdate copyright years.\n\n') f.write(content)
def main(): base = sys.argv[1] patches = sys.argv[2] output = sys.argv[3] counter = 1 index = [] files = os.listdir(patches) pc = 0 for pfile in sorted(files, reverse=True): sys.stdout.write("\r%d" % pc) pc += 1 pfile = os.path.join(patches, pfile) try: patch = PatchSet.from_filename(pfile) except: continue base_counter = counter for f in patch.modified_files: fn = "%05d_after_%s" % (counter, f.path.replace("/", "_")) srcf = os.path.join(base, f.path) if os.path.exists(srcf): index.append([fn]) copyfile(srcf, os.path.join(output, fn)) counter += 1 call(["git", "apply", "-R", "--reject", pfile], cwd=base) counter = base_counter for f in patch.modified_files: fn = "%05d_before_%s" % (counter, f.path.replace("/", "_")) srcf = os.path.join(base, f.path) if os.path.exists(srcf): index[counter - 1].insert(0, fn) copyfile(srcf, os.path.join(output, fn)) counter += 1 with open(os.path.join(output, "index.txt"), "w") as fout: for p in index: fout.write("%s %s\n" % tuple(p)) print()
import os from unidiff import PatchSet import json projects=['Chart','Time','Lang','Closure','Math','Mockito'] l=os.listdir('.') for patch_file in l: info={} try: patch = PatchSet.from_filename(patch_file) target_file=patch[0].source_file except: continue s=target_file.split('/')[0] info['ID']=patch_file s=s.split('_')[0] info['tool']='SimGen' info['correctness']='Incorrect' for p in projects: if p in s: info['project']=p info['bug_id']=s[len(p):-1] #print(info) f=open('INFO/%s.json'%patch_file,'w') json.dump(info,f) f.close()
def run(project,bugid,patch_no,tests,tmp_tracefile='tmp_c'): tmp_tracefile+=project+bugid+patch_no+'run_print_trace' tmp_tracefile=os.path.join(os.getcwd(),tmp_tracefile) w_buggy=project+str(bugid)+'b' w_patched=w_buggy+'_'+patch_no patchfile=os.path.join('../patches',patch_no) patch = PatchSet.from_filename(patchfile) source_file=patch[0].source_file target_file=patch[0].target_file os.system('cp '+source_file+' '+source_file+'.bak') os.system('cp '+target_file+' '+target_file+'.bak') os.system('make instru_class ARGS="-S '+source_file+' -T '+tmp_tracefile+'"') os.system('make instru_class ARGS="-S '+target_file+ ' -T '+tmp_tracefile+' '+ ' -P '+patchfile+ ' -F '+target_file+'"') # dir_path='../traces/'+w_patched if(os.path.exists(tmp_tracefile)): os.system('rm '+tmp_tracefile) os.system('mkdir '+dir_path) os.system('mkdir '+os.path.join(dir_path,'buggy')) os.system('mkdir '+os.path.join(dir_path,'patched')) if project=='Time': os.system('defects4j compile -w '+w_buggy) os.system('defects4j compile -w '+w_patched) # clone for test in tests: test=test.strip() testfile=os.path.join(w_buggy,get_path_to_test(w_buggy),test.split('::')[0].replace('.','/')+'.java') if project=='Time': os.system('rm '+tmp_tracefile) status=os.system('timeout 90 defects4j test -t '+test+' -w '+w_buggy) else: os.system('cp '+testfile+' '+testfile+'.bak') os.system('make GetSingleTest_Chart ARGS="'+testfile+' '+test.split('::')[1]+'"') status=os.system('timeout 90 defects4j test -t '+test+' -w '+w_buggy) os.system('mv '+testfile+'.bak '+testfile) print(testfile) if status==0: os.system('mv '+tmp_tracefile+' '+os.path.join(dir_path,'buggy','__'.join(test.split('::')))) testfile=os.path.join(w_patched,get_path_to_test(w_patched),test.split('::')[0].replace('.','/')+'.java') if project=='Time': os.system('rm '+tmp_tracefile) status=os.system('timeout 90 defects4j test -t '+test+' -w '+w_patched) else: os.system('cp '+testfile+' '+testfile+'.bak') os.system('make GetSingleTest_Chart ARGS="'+testfile+' '+test.split('::')[1]+'"') status=os.system('timeout 90 defects4j test -t '+test+' -w '+w_patched) os.system('mv '+testfile+'.bak '+testfile) if status==0: os.system('mv '+tmp_tracefile+' '+os.path.join(dir_path,'patched','__'.join(test.split('::')))) # clone os.system('mv '+source_file+'.bak '+source_file) os.system('mv '+target_file+'.bak '+target_file)