Esempio n. 1
0
def get_latest_patchset():
    """Return the PatchSet for the latest commit."""
    # regex from https://github.com/PyCQA/pylint/blob/master/pylintrc
    output = check_output(
        ['git', 'diff', '-U0', '@~..@'])
    return PatchSet.from_string(
        output.replace(b'\r\n', b'\n'), encoding='utf-8')
    def scan_diff(
        self,
        diff,
        baseline_filename='',
        last_commit_hash='',
        repo_name='',
    ):
        """For optimization purposes, our scanning strategy focuses on looking
        at incremental differences, rather than re-scanning the codebase every time.
        This function supports this, and adds information to self.data.

        :type diff: str
        :param diff: diff string.
                     Eg. The output of `git diff <fileA> <fileB>`

        :type baseline_filename: str
        :param baseline_filename: if there are any baseline secrets, then the baseline
                                  file will have hashes in them. By specifying it, we
                                  can skip this clear exception.

        :type last_commit_hash: str
        :param last_commit_hash: used for logging only -- the last commit hash we saved

        :type repo_name: str
        :param repo_name: used for logging only -- the name of the repo
        """
        try:
            patch_set = PatchSet.from_string(diff)
        except UnidiffParseError:  # pragma: no cover
            alert = {
                'alert': 'UnidiffParseError',
                'hash': last_commit_hash,
                'repo_name': repo_name,
            }
            log.error(alert)
            raise

        if self.exclude_regex:
            regex = re.compile(self.exclude_regex, re.IGNORECASE)

        for patch_file in patch_set:
            filename = patch_file.path
            # If the file matches the exclude_regex, we skip it
            if self.exclude_regex and regex.search(filename):
                continue

            if filename == baseline_filename:
                continue

            for results, plugin in self._results_accumulator(filename):
                results.update(
                    self._extract_secrets_from_patch(
                        patch_file,
                        plugin,
                        filename,
                    ),
                )
Esempio n. 3
0
    def test_patchset_from_bytes_string(self):
        with codecs.open(self.sample_file, 'rb') as diff_file:
            diff_data = diff_file.read()
            ps1 = PatchSet.from_string(diff_data, encoding='utf-8')

        with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
            ps2 = PatchSet(diff_file)

        self.assertEqual(ps1, ps2)
Esempio n. 4
0
 def get_commit_filechanges(self, repo, sha):
     resp = self.get(
         BitbucketAPIPath.repository_diff.format(
             repo=repo,
             spec=sha,
         ),
         allow_text=True,
     )
     diff_file = resp.text
     ps = PatchSet.from_string(diff_file)
     return self.transform_patchset(ps)
def run(project,bugid,patch_no,randoop_tests,tmp_tracefile='tmp_d'):
    tmp_tracefile+=project+bugid+patch_no+'run_trace_randoop.py'
    tmp_tracefile=os.path.join(os.getcwd(),tmp_tracefile)
    w_buggy=project+str(bugid)+'b'
    w_patched=w_buggy+'_'+patch_no
    #
    patchfile=os.path.join('../patches',patch_no)
    patch = PatchSet.from_filename(patchfile)
    source_file=patch[0].source_file
    target_file=patch[0].target_file
    
    os.system('cp '+source_file+' '+source_file+'.bak')
    os.system('cp '+target_file+' '+target_file+'.bak')

    os.system('make instru_class ARGS="-S '+source_file+' -T '+tmp_tracefile+'"')
    os.system('make instru_class ARGS="-S '+target_file+
            ' -T '+tmp_tracefile+' '+
            ' -P '+patchfile+
            ' -F '+target_file+'"')
    #
    dir_path='../traces/'+w_patched
    if(os.path.exists(tmp_tracefile)):
        os.system('rm '+tmp_tracefile)
    os.system('mkdir '+dir_path)
    os.system('mkdir '+os.path.join(dir_path,'buggy'))
    os.system('mkdir '+os.path.join(dir_path,'patched'))

    test='randoop'
    #
    testfile='../test_gen_randoop/'+project+'/randoop/'+bugid+'/'+project+'-'+bugid+'b-randoop.'+bugid+'.tar.bz2'
    
    comp_flag=True
    for Test_Case in randoop_tests:
        test='Randoop.'+Test_Case.strip()
        if comp_flag:
            status=os.system('timeout 90 defects4j test -s '+testfile+' -t '+Test_Case.strip()+' -w '+w_buggy)
        else:
            status=os.system('timeout 90 defects4j test -s '+testfile+' -t '+Test_Case.strip()+' -n -w '+w_buggy)
        if status==0:
            os.system('mv '+tmp_tracefile+' '+os.path.join(dir_path,'buggy','__'.join(test.split('::'))))
        
        if comp_flag:
            status=os.system('timeout 90 defects4j test -s '+testfile+' -t '+Test_Case.strip()+' -w '+w_patched)
        else:
            status=os.system('timeout 90 defects4j test -s '+testfile+' -t '+Test_Case.strip()+' -n -w '+w_patched)
        if status==0:
            os.system('mv '+tmp_tracefile+' '+os.path.join(dir_path,'patched','__'.join(test.split('::'))))
        comp_flag=False
    
    os.system('mv '+source_file+'.bak '+source_file)
    os.system('mv '+target_file+'.bak '+target_file)
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser(description='Утилита для проверки ошибочно изменных файлов в индексе')
    parser.add_argument('--version', action='version', version='%(prog)s {}'.format(__version__))
    parser.add_argument('-v', '--verbose', dest='verbose_count', action='count', default=0,
                        help='Increases log verbosity for each occurence.')
    parser.add_argument('--g', action='store_true', default=False,
                        help='Запустить чтение индекса из git и определить список файлов для разбора')
    
    args = parser.parse_args()

    log.setLevel(max(3 - args.verbose_count, 0) * 10)
    
    taglistchange = ('<d3p1:id>', '<d3p1:fullIntervalBegin>',
                     '<d3p1:fullIntervalEnd>', '<d3p1:visualBegin>',
                     '<xr:TypeId>',
                     '<xr:ValueId>'
                     )

    if args.g is True:
        files = get_list_of_comitted_files()
        for file in files:
            if not file[-12:] == "Template.xml":
                continue
                
            data = get_diff_forfile(file)
            if data is None:
                log.error("diff file not exists {}".format(file))
                continue
            pathc = PatchSet.from_filename(data, encoding='utf-8')
            for f in pathc.modified_files:
                log.debug('file is {}'.format(f))
                modifiedsource, modifiedtarget = [],[]
                for hunk in f:
                    modifiedsource = modifiedsource + list(filter(lambda x: not x[:1] == " ", hunk.source))
                    modifiedtarget = modifiedtarget + list(filter(lambda x: not x[:1] == " ", hunk.target))
                
                
                sourcetags = list(filter(lambda x: x[1:].strip().startswith(taglistchange), modifiedsource))
                targettags = list(filter(lambda x: x[1:].strip().startswith(taglistchange), modifiedtarget))
                log.debug(sourcetags)
                log.debug(targettags)
                
                if not (len(sourcetags) == len(modifiedsource) and \
                    len(targettags) == len(modifiedtarget) and \
                    len(sourcetags) == len(targettags)):
                    continue
            
                #Теперь надо будет отменить изменения в индексе для файла. 
                git_reset_file(file, 'HEAD')
                break
        replace_old_form_attr(files)
def run(project,bugid,patch_no,tmp_tracefile='tmp_a'):
        #v=line[2:].split(' ')
        #project=v[0]
        #bugid=v[1]
        tmp_tracefile+=project+bugid+patch_no+'get_randoop_coverage'
        tmp_tracefile=os.path.join(os.getcwd(),tmp_tracefile)
        w_buggy=project+bugid+'b'
        test='randoop'
        #
        testfiledir='../test_gen_randoop/'+project+'/randoop/'+bugid+'/'
        targetfile=testfiledir+project+'-'+bugid+'b-randoop.'+bugid+'.instr.tar.bz2'
        testfile=testfiledir+project+'-'+bugid+'b-randoop.'+bugid+'.tar.bz2'
        targetdir=testfiledir+'suite'
        os.system('mkdir '+targetdir)
        os.system('tar xvf '+testfile+' -C '+targetdir)
        os.system('make TestCaseInstr ARGS="'+targetdir+' '+tmp_tracefile+' Randoop"')
        os.system('cd '+targetdir+' && tar -c ./* | bzip2 > ../'+project+'-'+bugid+'b-randoop.'+bugid+'.instr.tar.bz2')
        #patch_no=line[3:].split(' ')[1][1:]
        #print(patch_no)

        #os.system('cp -r '+v+' '+v+'_Patch'+line[3:].split(' ')[1][1:])
        #print(line)
        #if line[0]=='[':
        #    bug_loc=line.split(']')[0][1:]
        #else :
        #    bug_loc=line.split(' (')[0]
        #bug_loc=bug_loc.split/(':')
        #bug_loc=bug_loc[0].replace('.','/')+'.java:'+bug_loc[1]
        print(w_buggy+'_'+patch_no)
        patch = PatchSet.from_filename('../patches/'+patch_no)
        souce_file_list=[]
        for filei in range(len(patch)):
            source_file=patch[filei].source_file
            souce_file_list.append(source_file)
            line_no_list=[]
            for hunki in range(len(patch[filei])):
                for i in range(len(patch[filei][hunki])):
                    if not patch[filei][hunki][i].is_context:
                        line_no_list.append(str(patch[filei][hunki][i-1].source_line_no+1))
                        break
            os.system('cp '+source_file+' '+source_file+'.bak')
            os.system('make MthdInstr ARGS="'+source_file+' '+tmp_tracefile+' '+','.join(line_no_list)+'"')
        if(os.path.exists(tmp_tracefile)):
            os.system('rm '+tmp_tracefile)
        os.system('defects4j test -s '+targetfile+' -w '+w_buggy)
        os.system('mv '+tmp_tracefile+' ../randoop_cover/'+w_buggy+'_'+patch_no+'.txt')
        for source_file in souce_file_list:
            os.system('rm '+source_file)
            os.system('mv '+source_file+'.bak '+source_file)
        os.system('rm -rf '+w_buggy)
        os.system('defects4j checkout -p '+project+' -v '+bugid+'b -w '+project+bugid+'b')
Esempio n. 8
0
    def get_commit_filechanges(self, repo, sha):
        # returns unidiff file

        resp = self.get(
            '/2.0/repositories/{}/diff/{}'.format(
                repo,
                sha,
            ),
            allow_text=True,
        )

        diff_file = resp.text
        ps = PatchSet.from_string(diff_file)
        return self.transform_patchset(ps)
Esempio n. 9
0
    def get_vulnerable_lines(dir):
        flaw_dict = {}
        patch_file = join(dir, 'patch/file.patch')
        patch = PatchSet.from_filename(patch_file, encoding='latin-1')

        for file in patch:
            filename = abspath(join(dir, 'app', file.path))

            # Produces a list of vulnerable line numbers
            vulnerable_lines = [
                line.source_line_no for hunk in file for line in hunk
                if line.is_removed
            ]

            flaw_dict[filename] = vulnerable_lines

        return flaw_dict
Esempio n. 10
0
    def test_parse_sample(self):
        """Parse sample file."""
        with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
            res = PatchSet(diff_file)

        # three file in the patch
        self.assertEqual(len(res), 3)
        # three hunks
        self.assertEqual(len(res[0]), 3)

        # first file is modified
        self.assertTrue(res[0].is_modified_file)
        self.assertFalse(res[0].is_removed_file)
        self.assertFalse(res[0].is_added_file)

        # Hunk 1: five additions, no deletions, a section header
        self.assertEqual(res[0][0].added, 6)
        self.assertEqual(res[0][0].removed, 0)
        self.assertEqual(res[0][0].section_header, 'Section Header')

        # Hunk 2: 2 additions, 8 deletions, no section header
        self.assertEqual(res[0][1].added, 2)
        self.assertEqual(res[0][1].removed, 8)
        self.assertEqual(res[0][1].section_header, '')

        # Hunk 3: four additions, no deletions, no section header
        self.assertEqual(res[0][2].added, 4)
        self.assertEqual(res[0][2].removed, 0)
        self.assertEqual(res[0][2].section_header, '')

        # Check file totals
        self.assertEqual(res[0].added, 12)
        self.assertEqual(res[0].removed, 8)

        # second file is added
        self.assertFalse(res[1].is_modified_file)
        self.assertFalse(res[1].is_removed_file)
        self.assertTrue(res[1].is_added_file)

        # third file is removed
        self.assertFalse(res[2].is_modified_file)
        self.assertTrue(res[2].is_removed_file)
        self.assertFalse(res[2].is_added_file)

        self.assertEqual(res.added, 21)
        self.assertEqual(res.removed, 17)
Esempio n. 11
0
    def fetch_changed_files(self):
        if not self.diff_url:
            raise Exception("Diff URL cannot be find for event")

        response = get_with_retries(
            self.diff_url,
            sleep=RETRY_SLEEP,
        )
        response.raise_for_status()
        if "commits" in self.event and self.number == 0:
            diff = response.json()

            if "files" in diff:
                self.changed_files = [f["filename"] for f in diff["files"]]
        else:
            diff_object = PatchSet(response.text)
            self.changed_files = {f.path for f in diff_object}
def main():
    args = parse_args()
    if not args.commits:
        exit("missing commit range")

    # pylint does not like the 'sh' library
    # pylint: disable=too-many-function-args,unexpected-keyword-arg
    commit = sh.git("diff", args.commits, **sh_special_args)
    patch_set = PatchSet(commit)
    zephyr_base = os.getenv("ZEPHYR_BASE")
    violations = {}
    numViolations = 0

    for f in patch_set:
        if not f.path.endswith(".c") and not f.path.endswith(".h") or not os.path.exists(zephyr_base + "/" + f.path):
            continue

        for script in coccinelle_scripts:
            script_path = os.getenv("ZEPHYR_BASE") + "/" + script
            cocci = sh.coccicheck(
                "--mode=report",
                "--cocci=" +
                script_path,
                f.path,
                **sh_special_args)
            parse_coccinelle(cocci, violations)

        for hunk in f:
            for line in hunk:
                if line.is_added:
                    violation = "{}:{}".format(f.path, line.target_line_no)
                    if violation in violations:
                        numViolations += 1
                        if args.output:
                            with open(args.output, "a+") as fp:
                                fp.write("{}:{}\n".format(
                                    violation, "\t\n".join(
                                        violations[violation])))
                        else:
                            print(
                                "{}:{}".format(
                                    violation, "\t\n".join(
                                        violations[violation])))

    return numViolations
def get_patched_class(patch_no):
    patchfile=os.path.join('../../patches',patch_no)
    patch = PatchSet.from_filename(patchfile,encoding='utf-8')
    source_file=patch[0].source_file
    #print(source_file)
    line_no_list=[]
    tmp_file='tmp_result'+patch_no
    for hunki in range(len(patch[0])):
        for i in range(len(patch[0][hunki])):
            if not patch[0][hunki][i].is_context:
                line_no_list.append(str(patch[0][hunki][i-1].source_line_no+1))
                break
    os.system('cd .. && make PatchInfo ARGS="'+os.path.join('../source/',source_file)+' '+tmp_file+' '+','.join(line_no_list)+'" >/dev/null')
    f=open('../'+tmp_file)
    res=f.readlines()[0].strip()
    f.close()
    os.system('rm ../'+tmp_file)
    return res
Esempio n. 14
0
    def _get_added(cls, diff):
        patches = PatchSet(StringIO(diff))

        diff_contents = []
        for p in patches:
            if p.added > 0:
                contents = []
                for h in p:
                    added = []
                    for i, line in enumerate(h):
                        if line.is_added:
                            added_line = Line(line.target_line_no, line.value,
                                              i + 1)
                            added.append(added_line)
                    contents += added
                diff_contents.append(DiffContent(p.path, contents))

        return diff_contents
Esempio n. 15
0
    def test_samples(self):
        tests_dir = os.path.dirname(os.path.realpath(__file__))
        for fname in self.samples:
            file_path = os.path.join(tests_dir, 'samples', fname)
            with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
                res = PatchSet(diff_file)

            # 3 files updated by diff
            self.assertEqual(len(res), 3)

            # 1 added file
            added_files = res.added_files
            self.assertEqual(len(added_files), 1)
            self.assertEqual(added_files[0].path, 'added_file')
            # 1 hunk, 4 lines
            self.assertEqual(len(added_files[0]), 1)
            self.assertEqual(added_files[0].added, 4)
            self.assertEqual(added_files[0].removed, 0)

            # 1 removed file
            removed_files = res.removed_files
            self.assertEqual(len(removed_files), 1)
            self.assertEqual(removed_files[0].path, 'removed_file')
            # 1 hunk, 3 removed lines
            self.assertEqual(len(removed_files[0]), 1)
            self.assertEqual(removed_files[0].added, 0)
            self.assertEqual(removed_files[0].removed, 3)

            # 1 modified file
            modified_files = res.modified_files
            self.assertEqual(len(modified_files), 1)
            self.assertEqual(modified_files[0].path, 'modified_file')
            # 1 hunk, 3 added lines, 1 removed line
            self.assertEqual(len(modified_files[0]), 1)
            self.assertEqual(modified_files[0].added, 3)
            self.assertEqual(modified_files[0].removed, 1)

            self.assertEqual(res.added, 7)
            self.assertEqual(res.removed, 4)

            # check that original diffs and those produced
            # by unidiff are the same
            with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
                self.assertEqual(diff_file.read(), str(res))
Esempio n. 16
0
def parse_diff(lines):
    is_merge = False
    is_valid_sig = False
    commit_hash = None

    diff_lines = []
    for line in lines:
        if line.startswith("commit"):
            _, commit_hash = line.split()

        if diff_lines:
            diff_lines.append(line)

        # get the rest of the diffs afte the first one
        if line.startswith('diff') and not diff_lines:
            diff_lines = [line]

    patch = PatchSet(diff_lines)
    return commit_hash, is_valid_sig, is_merge, patch
Esempio n. 17
0
def test_flake8_lint_a_py(app, caplog):
    diff = """diff --git a/a.py b/a.py
new file mode 100644
index 0000000..fdeea15
--- /dev/null
+++ b/a.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+
+
+def add(a, b):
+    return a+ b
"""

    context = TestContext('deepanalyzer/badwolf',
                          None,
                          'pullrequest',
                          'message', {'commit': {
                              'hash': '000000'
                          }}, {'commit': {
                              'hash': '111111'
                          }},
                          pr_id=1)
    spec = Specification()
    spec.linters.append(ObjectDict(name='flake8', pattern=None))
    lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'flake8'))
    patch = PatchSet(diff.split('\n'))
    with mock.patch.object(lint, 'load_changes') as load_changes,\
            mock.patch.object(lint, 'update_build_status') as build_status,\
            mock.patch.object(lint, '_report') as report:
        load_changes.return_value = patch
        build_status.return_value = None
        report.return_value = None
        lint.problems.set_changes(patch)
        lint.process()

        assert load_changes.called

    assert len(lint.problems) == 1
    problem = lint.problems[0]
    assert problem.filename == 'a.py'
    assert problem.line == 6
Esempio n. 18
0
def create_annotations_from_patch(patch):
    # This is where the magic happens, a unified diff formatted patch
    # as for example created by git diff get parsed and gets converted
    # into annotations

    annotations = list()
    try:
        pset = PatchSet(patch)
        for mod_files in pset.modified_files:
            # Parse hunks , create annotation

            for hunk in mod_files:
                a = Annotation(mod_files.path, hunk.source_start,
                               hunk.target_start)
                offset = 0
                for line in hunk.target:
                    if line[0] == '+':
                        if a.start_new_annotation():
                            # A single hunk can contain multiple annotations we create and annotation
                            # object for every section of added content
                            annotations.append(_post_process_annotation(a))
                            b = Annotation(mod_files.path, a.source_start,
                                           a.target_start)
                            # copy lines from the previous annotation for context
                            [b.addContext(item, False) for item in a.context]
                            a = b
                        a.addContext([
                            hunk.target_start + offset,
                            format(line[1:].rstrip('\n'))
                        ], True)
                    elif line[0] not in ['+', '-']:
                        a.addContext([
                            hunk.source_start + offset,
                            format(line[1:].rstrip('\n'))
                        ], False)
                    #keep track of the offset compared to the hunk
                    if line[0] != '-':
                        offset += 1
                annotations.append(_post_process_annotation(a))
    except Exception:
        traceback.print_exc(file=sys.stdout)
        sys.exit(2)
    return annotations
Esempio n. 19
0
 def commit(self, request):
     template = self.view.get_template("repo/commit.html")
     repo = Repo.retrieve(name=request.params['repo'])
     entity = repo.entity
     commit = entity.commit(request.params['commit'])
     diff_str = entity.git.diff(
         str(commit) + '~1',
         commit,
         ignore_blank_lines=True,
         ignore_space_at_eol=True) if len(commit.parents) > 0 else None
     diff = None
     if diff_str:
         diff = PatchSet(diff_str)
     return Response.render(template, {
         'repo': repo,
         'entity': entity,
         'commit': commit,
         'diff': diff
     })
Esempio n. 20
0
    def _iterate_commits(self, storage: Storage, start_from=0, stop_at=inf):
        query: GithubQuery = self.query

        skip_n = start_from
        if CACHE_THE_DATA:
            for i, commit in enumerate(self.load_commits(storage)):
                if i == 0:
                    print("SOME OF THE DATA WERE LOADED FROM CACHE!")
                if not self._in_bounds(i, start_from, stop_at):
                    continue
                yield commit
                skip_n += 1

        for i, commit in enumerate(query.repo_iterate_commits()):
            if i < skip_n or not self._in_bounds(i, start_from, stop_at):
                continue

            cobj = self._create_commit(commit)

            # ~~~~~~~~~ save patches mapping ~~~~~~~~~~~~~~~~
            # `git show` doesn't show merges details so here we had to
            # give it a 2-commits diff to show (unless it is the first commit)
            show_str = f"{commit.parents[0]}..{commit}" if commit.parents else commit
            patch_by_files = PatchSet(
                query.repo.git.show(show_str, first_parent=True))
            pfiles: Dict[str,
                         PatchedFile] = {pf.path: pf
                                         for pf in patch_by_files}
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

            # ~~~~~~~~~ convert the stats ~~~~~~~~~~~~~~~~
            for fname in commit.stats.files.keys():
                fc = self._create_file_changeset(fname, pfiles, commit.hexsha)
                cobj.files.append(fc)
            cobj.files.sort()  # place "RENAME" before others
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

            if CACHE_THE_DATA:
                storage.save_obj(cobj)
            yield cobj
        ### END

        storage.dispose()
Esempio n. 21
0
    def fetch_changed_files(self):
        if not getattr(self, "diff_urls", False):
            raise TypeError("The event does not have diff URLs")

        for diff_url in self.diff_urls:
            response = get_with_retries(
                diff_url,
                sleep=RETRY_SLEEP,
            )
            response.raise_for_status()
            if "commits" in self.event and self.number == 0:
                diff = response.json()

                if "files" in diff:
                    self.changed_files = {f["filename"] for f in diff["files"]}
            else:
                diff_object = PatchSet(response.text)
                self.changed_files.update({f.path for f in diff_object})
        print(f"Fetched info about {len(self.changed_files)} changed files")
Esempio n. 22
0
def test_mypy_lint_a_py(app, caplog):
    diff = """diff --git a/a.py b/a.py
new file mode 100644
index 0000000..87604af
--- /dev/null
+++ b/a.py
@@ -0,0 +1,5 @@
+def p() -> None:
+    print('hello')
+
+
+a = p()
"""

    context = Context(
        'deepanalyzer/badwolf',
        None,
        'pullrequest',
        'message',
        {'commit': {'hash': '000000'}},
        {'commit': {'hash': '111111'}},
        pr_id=1
    )
    spec = Specification()
    spec.linters.append(ObjectDict(name='mypy', pattern=None))
    lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'mypy'))
    patch = PatchSet(diff.split('\n'))
    with mock.patch.object(lint, 'load_changes') as load_changes,\
            mock.patch.object(lint, 'update_build_status') as build_status,\
            mock.patch.object(lint, '_report') as report:
        load_changes.return_value = patch
        build_status.return_value = None
        report.return_value = (1, 2)
        lint.problems.set_changes(patch)
        lint.process()

        assert load_changes.called

    assert len(lint.problems) == 1
    problem = lint.problems[0]
    assert problem.line == 5
    assert problem.filename == 'a.py'
Esempio n. 23
0
def extract_single_line_patches(zip_path):
    logger.info(
        f"Extracting examples from {zip_path}, this might take a while.")
    examples = []
    with zipfile.ZipFile(zip_path) as rawzip:
        # I think the first member is the base dir
        base_path = rawzip.infolist()[0].filename

        info_pairs = get_buggy_pairs(rawzip)

        logger.info(f"Found {len(info_pairs)} buggy java files.")
        logger.info("Filtering single line patches...")
        for buggyinfo, fixedinfo in tqdm(
                info_pairs,
                desc="Progress",
                disable=not logger.isEnabledFor(logging.INFO),
                unit="files",
        ):
            with rawzip.open(buggyinfo) as buggyfile, rawzip.open(
                    fixedinfo) as fixedfile:
                buggy_code = decode(buggyfile.read())
                fixed_code = decode(fixedfile.read())
            diff = list(
                unified_diff(
                    buggy_code.splitlines(keepends=True),
                    fixed_code.splitlines(keepends=True),
                    fromfile=buggyinfo.filename,
                    tofile=fixedinfo.filename,
                ))
            patch = PatchSet(diff)
            for file_patch in patch:
                if is_single_line(file_patch):
                    fixed_line, lineno = get_fixed_line(file_patch)
                    id_ = str(
                        PurePosixPath(
                            buggyinfo.filename).relative_to(base_path))
                    examples.append(
                        SingleLineFixExample(id_, buggy_code, fixed_line,
                                             lineno))
    # Hopefully this frees some memory
    del rawzip
    return examples
Esempio n. 24
0
def run(project,bugid,patch_no,tmp_tracefile='tmp_b'):
        w_buggy=project+bugid+'b'
        test='randoop'
        
        tmp_tracefile+=project+bugid+patch_no+'get_test_coverage'
        tmp_tracefile=os.path.join(os.getcwd(),tmp_tracefile)
        #
        testdir=os.path.join(w_buggy,get_path_to_test(w_buggy))
        os.system('cp -r '+testdir+' '+testdir+'_bak')
        
        
        os.system('make TestCaseInstr ARGS="'+testdir+' '+tmp_tracefile+' '+project+'"')
        
        print(w_buggy+'_'+patch_no)
        #
        patch = PatchSet.from_filename('../patches/'+patch_no)
        souce_file_list=[]
        for filei in range(len(patch)):
            source_file=patch[filei].source_file
            souce_file_list.append(source_file)
            line_no_list=[]
            for hunki in range(len(patch[filei])):
                for i in range(len(patch[filei][hunki])):
                    if not patch[filei][hunki][i].is_context:
                        line_no_list.append(str(patch[filei][hunki][i-1].source_line_no+1))
                        break
            os.system('cp '+source_file+' '+source_file+'.bak')
            os.system('make MthdInstr ARGS="'+source_file+' '+tmp_tracefile+' '+','.join(line_no_list)+'"')

        os.system('defects4j compile -w '+w_buggy)
        if(os.path.exists(tmp_tracefile)):
            os.system('rm '+tmp_tracefile)
        os.system('defects4j test -n -r -w '+w_buggy)
        os.system('mv '+tmp_tracefile+' ../test_coverage/'+w_buggy+'_'+patch_no+'.txt')
        for source_file in souce_file_list:
            os.system('rm '+source_file)
            os.system('mv '+source_file+'.bak '+source_file)
        os.system('rm -rf '+testdir)
        os.system('mv '+testdir+'_bak '+testdir)
        
        os.system('rm -rf '+w_buggy)
        os.system('defects4j checkout -p '+project+' -v '+bugid+'b -w '+project+bugid+'b')
Esempio n. 25
0
    def __init__(self, filename, strict=False):
        self.filename = filename
        diff = PatchSet.from_filename(filename)
        date = None
        author = None

        with open(self.filename, 'r') as f:
            lines = f.read().splitlines()
        lines = list(takewhile(lambda line: line != '---', lines))
        for line in lines:
            if line.startswith(DATE_PREFIX):
                date = parse(line[len(DATE_PREFIX):])
            elif line.startswith(FROM_PREFIX):
                author = GitCommit.format_git_author(line[len(FROM_PREFIX):])
        header = list(takewhile(lambda line: line != '', lines))
        body = lines[len(header) + 1:]

        modified_files = []
        for f in diff:
            # Strip "a/" and "b/" prefixes
            source = f.source_file[2:]
            target = f.target_file[2:]

            if f.is_added_file:
                t = 'A'
            elif f.is_removed_file:
                t = 'D'
            elif f.is_rename:
                # Consider that renamed files are two operations: the deletion
                # of the original name and the addition of the new one.
                modified_files.append((source, 'D'))
                t = 'A'
            else:
                t = 'M'
            modified_files.append((target, t))
        super().__init__(None,
                         date,
                         author,
                         body,
                         modified_files,
                         strict=strict,
                         commit_to_date_hook=lambda x: date)
Esempio n. 26
0
def test_bandit_lint_a_py(app, caplog):
    diff = """diff --git a/a.py b/a.py
new file mode 100644
index 0000000..719cd56
--- /dev/null
+++ b/a.py
@@ -0,0 +1,4 @@
+try:
+    a = 1
+except Exception:
+    pass
"""

    context = TestContext('deepanalyzer/badwolf',
                          None,
                          'pullrequest',
                          'message', {'commit': {
                              'hash': '000000'
                          }}, {'commit': {
                              'hash': '111111'
                          }},
                          pr_id=1)
    spec = Specification()
    spec.linters.append(ObjectDict(name='bandit'))
    lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'bandit'))
    patch = PatchSet(diff.split('\n'))
    with mock.patch.object(lint, 'load_changes') as load_changes,\
            mock.patch.object(lint, 'update_build_status') as build_status,\
            mock.patch.object(lint, '_report') as report:
        load_changes.return_value = patch
        build_status.return_value = None
        report.return_value = None
        lint.problems.set_changes(patch)
        lint.process()

        assert load_changes.called

    assert len(lint.problems) == 1
    problem = lint.problems[0]
    assert problem.filename == 'a.py'
    assert problem.line == 3
    assert not problem.is_error
Esempio n. 27
0
    def test_git_renaming(self):
        tests_dir = os.path.dirname(os.path.realpath(__file__))
        file_path = os.path.join(tests_dir, 'samples/git_rename.diff')
        with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
            res = PatchSet(diff_file)

        self.assertEqual(len(res), 1)

        patch = res[0]
        self.assertTrue(patch.is_rename)
        self.assertEqual(patch.added, 1)
        self.assertEqual(patch.removed, 1)
        self.assertEqual(len(res.modified_files), 1)
        self.assertEqual(len(res.added_files), 0)
        self.assertEqual(len(res.removed_files), 0)

        # check that original diffs and those produced
        # by unidiff are the same
        with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
            self.assertEqual(diff_file.read(), str(res))
Esempio n. 28
0
def test_yamllint_a_yml(app, caplog):
    diff = """diff --git a/a.yml b/a.yml
new file mode 100644
index 0000000..1eccee8
--- /dev/null
+++ b/a.yml
@@ -0,0 +1,3 @@
+---
+a: 1
+a: 2
"""

    context = TestContext('deepanalyzer/badwolf',
                          None,
                          'pullrequest',
                          'message', {'commit': {
                              'hash': '000000'
                          }}, {'commit': {
                              'hash': '111111'
                          }},
                          pr_id=1)
    spec = Specification()
    spec.linters.append(ObjectDict(name='yamllint', pattern=None))
    lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH,
                                                     'yamllint'))
    patch = PatchSet(diff.split('\n'))
    with mock.patch.object(lint, 'load_changes') as load_changes,\
            mock.patch.object(lint, 'update_build_status') as build_status,\
            mock.patch.object(lint, '_report') as report:
        load_changes.return_value = patch
        build_status.return_value = None
        report.return_value = None
        lint.problems.set_changes(patch)
        lint.process()

        assert load_changes.called

    assert len(lint.problems) == 1
    problem = lint.problems[0]
    assert problem.filename == 'a.yml'
    assert problem.line == 3
Esempio n. 29
0
def test_eslint_lint_a_js(app, pr_context):
    diff = """diff --git a/.eslintrc b/.eslintrc
new file mode 100644
index 0000000..45e5d69
--- /dev/null
+++ b/.eslintrc
@@ -0,0 +1,5 @@
+{
+    "rules": {
+        "quotes": [2, "single"]
+    }
+}
diff --git a/a.js b/a.js
new file mode 100644
index 0000000..f119a7f
--- /dev/null
+++ b/a.js
@@ -0,0 +1 @@
+console.log("bar")
"""

    spec = Specification()
    spec.linters.append(ObjectDict(name='eslint', pattern=None))
    lint = LintProcessor(pr_context, spec,
                         os.path.join(FIXTURES_PATH, 'eslint'))
    patch = PatchSet(diff.split('\n'))
    with mock.patch.object(lint, 'load_changes') as load_changes,\
            mock.patch.object(lint, 'update_build_status') as build_status,\
            mock.patch.object(lint, '_report') as report:
        load_changes.return_value = patch
        build_status.return_value = None
        report.return_value = (1, 2)
        lint.problems.set_changes(patch)
        lint.process()

        assert load_changes.called

    assert len(lint.problems) == 1
    problem = lint.problems[0]
    assert problem.filename == 'a.js'
    assert problem.line == 1
Esempio n. 30
0
    def check_diff(self):
        diff_file = requests.get(self.pr_info['diff_url'], auth=API_AUTH).text
        diff = PatchSet(diff_file)

        fcount = diff_file.count("diff --git")

        if fcount > 1:
            self.add_invalid(
                'More than one file has been added/removed/modified.')
            return
        elif fcount < 1:
            self.add_invalid(
                'Less than one file has been added/removed/modified.')
            return
        elif diff[0].is_modified_file:
            self.add_attention('This file has modifies a pre-existing file.')
            return

        new_file = self.parse_diff(str(diff[0]).split('\n'))

        return {'lines': new_file, 'diff': diff}
Esempio n. 31
0
 def __init__(self):
     self.current_time = int(time.time())
     self.repo = git.Repo(".")
     self.prior_commit = self._starting_commit(COMMIT_AGE)
     self.current_commit = self.repo.commit("HEAD")
     self.diff_raw = self.repo.git.diff(self.prior_commit,
                                        self.current_commit,
                                        ignore_blank_lines=True,
                                        ignore_space_at_eol=True)
     self.patch = PatchSet(self.diff_raw)
     self.filenames = {}
     self.updates = {}
     self.updates["authors"] = []
     self.updates["author_count"] = 0
     self.updates["entities"] = []
     self.updates["pandas"] = []
     self.updates["panda_count"] = 0
     self.updates["photos"] = []
     self.updates["zoos"] = []
     self.updates["zoo_count"] = 0
     self.create_updates()
Esempio n. 32
0
def is_constitutional(diff_str):
    with open(settings.SITE_ROOT + '/elections/constitution.json') as f:
        constitution = json.load(f)
    patch = PatchSet(diff_str)
    matched_files = []

    for file in patch:
        if file.path in constitution:
            # File removed or renamed
            if file.is_removed_file or (file.source_file.split('/')[-1] !=
                                        file.target_file.split('/')[-1]):
                matched_files.append(file.path)
            locks = constitution[file.path]
            # Entire file included in constitution
            if locks is None:
                matched_files.append(file.path)
            # File overlaps with at least one protected chunk
            if _check_hunks(file, locks):
                matched_files.append(file.path)

    return matched_files
Esempio n. 33
0
def test_jsonlint_a_json_changes_in_range(app, caplog):
    diff = """diff --git a/b.json b/b.json
index 6ebebfe..6be8d74 100644
--- a/b.json
+++ b/b.json
@@ -1,3 +1,4 @@
 {
     "a": 1
+    "b": 2
 }
"""

    context = Context(
        'deepanalyzer/badwolf',
        None,
        'pullrequest',
        'message',
        {'commit': {'hash': '000000'}},
        {'commit': {'hash': '111111'}},
        pr_id=1
    )
    spec = Specification()
    spec.linters.append(ObjectDict(name='jsonlint', pattern=None))
    lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH, 'jsonlint'))
    patch = PatchSet(diff.split('\n'))
    with mock.patch.object(lint, 'load_changes') as load_changes,\
            mock.patch.object(lint, 'update_build_status') as build_status,\
            mock.patch.object(lint, '_report') as report:
        load_changes.return_value = patch
        build_status.return_value = None
        report.return_value = (1, 2)
        lint.problems.set_changes(patch)
        lint.process()

        assert load_changes.called

    assert len(lint.problems) == 1
    problem = lint.problems[0]
    assert problem.filename == 'b.json'
    assert problem.line == 2
Esempio n. 34
0
def test_shellcheck_a_sh(app, caplog):
    diff = """diff --git a/a.sh b/a.sh
new file mode 100644
index 0000000..9fb9840
--- /dev/null
+++ b/a.sh
@@ -0,0 +2 @@
+#!/bin/sh
+$foo=42
"""

    context = Context('deepanalyzer/badwolf',
                      None,
                      'pullrequest',
                      'message', {'commit': {
                          'hash': '000000'
                      }}, {'commit': {
                          'hash': '111111'
                      }},
                      pr_id=1)
    spec = Specification()
    spec.linters.append(ObjectDict(name='shellcheck', pattern=None))
    lint = LintProcessor(context, spec,
                         os.path.join(FIXTURES_PATH, 'shellcheck'))
    patch = PatchSet(diff.split('\n'))
    with mock.patch.object(lint, 'load_changes') as load_changes,\
            mock.patch.object(lint, 'update_build_status') as build_status,\
            mock.patch.object(lint, '_report') as report:
        load_changes.return_value = patch
        build_status.return_value = None
        report.return_value = None
        lint.problems.set_changes(patch)
        lint.process()

        assert load_changes.called

    assert len(lint.problems) > 0
    problem = lint.problems[0]
    assert problem.filename == 'a.sh'
    assert problem.line == 2
Esempio n. 35
0
def test_sasslint_lint_a_scss(app, caplog):
    diff = """diff --git a/a.scss b/a.scss
new file mode 100644
index 0000000..48b3ebe
--- /dev/null
+++ b/a.scss
@@ -0,0 +1,3 @@
+.test {
+    background-color: "#FFF"
+}
"""

    context = Context('deepanalyzer/badwolf',
                      None,
                      'pullrequest',
                      'message', {'commit': {
                          'hash': '000000'
                      }}, {'commit': {
                          'hash': '111111'
                      }},
                      pr_id=1)
    spec = Specification()
    spec.linters.append(ObjectDict(name='sasslint', pattern=None))
    lint = LintProcessor(context, spec, os.path.join(FIXTURES_PATH,
                                                     'sasslint'))
    patch = PatchSet(diff.split('\n'))
    with mock.patch.object(lint, 'load_changes') as load_changes,\
            mock.patch.object(lint, 'update_build_status') as build_status,\
            mock.patch.object(lint, '_report') as report:
        load_changes.return_value = patch
        build_status.return_value = None
        report.return_value = None
        lint.problems.set_changes(patch)
        lint.process()

        assert load_changes.called

    assert len(lint.problems) == 3
    problem = lint.problems[0]
    assert problem.filename == 'a.scss'
Esempio n. 36
0
def predict_failing_tests(path, predictor, encoder, test_ids_to_test_names):
    # Get Change from filename
    repo = Repo(path)
    diff = repo.git.diff(repo.head, None, "--unified=0")
    patchset = PatchSet(diff)
    # Analyze all the different changes
    mutants = []
    for patchedFile in patchset:
        for hunk in patchedFile:
            line_difference = 0
            for line in hunk:
                line_difference += 1
                if line.is_removed:
                    mutants.append({
                        "modified_file_path":
                        patchedFile.
                        target_file[2:],  # the [2:] removes unwanted prefixes
                        # 'previous_line': str(line)[2:],
                        "line_number_changed":
                        hunk.source_start + line_difference,
                    })

    test_ids = test_ids_to_test_names.index
    mutants_with_test_ids = []
    for mutant in mutants:
        for test_id in test_ids:
            mutant["test_id"] = test_id
            mutants_with_test_ids.append(mutant.copy())

    mutants_with_test_ids = DataFrame(mutants_with_test_ids)
    mutants_with_test_ids
    mutants_with_test_ids[encoded_column_names] = encoder.transform(
        mutants_with_test_ids[encoded_column_names])
    mutants_with_test_ids["prediction"] = predictor.predict(
        mutants_with_test_ids)
    # Union all predicted failures
    prediction_per_test_id = mutants_with_test_ids.groupby(
        "test_id").all()["prediction"]

    return prediction_per_test_id[prediction_per_test_id == False].index
Esempio n. 37
0
def main(commit_sha, comments_url, github_token):
    repository = git.Repo()

    uni_diff_text = repository.git.diff(unified=0)

    patch_set = PatchSet(StringIO(uni_diff_text))

    hunks = []
    for patched_file in patch_set:
        file_path = patched_file.path

        for hunk in patched_file:
            # line 0 is not accepted
            start = hunk.source_start or 1
            hunk_json = {
                "start_line": start,
                "line": start + hunk.source_length - 1,
                "body": comment_body(hunk),
                "path": file_path,
                "commit_id": commit_sha,
                "side": "RIGHT",
                "start_side": "RIGHT",
            }
            hunks.append(hunk_json)

    for hunk in hunks:
        headers = {
            "Authorization": f"token {github_token}",
            "Content-Type": "application/json",
            "Accept": "application/vnd.github.comfort-fade-preview+json",
        }
        if hunk["start_line"] >= hunk["line"]:
            #     Single line comment
            headers.pop("Accept")
            hunk["line"] = hunk.pop("start_line")
        print(f"Making request: {hunk}")
        resp = requests.post(comments_url, json=hunk, headers=headers)
        if resp.status_code >= 400:
            print(f"Error: {resp.content.decode()}")
Esempio n. 38
0
def update_copyright(data):
    current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
    username = subprocess.check_output('git config user.name',
                                       shell=True,
                                       encoding='utf8').strip()
    email = subprocess.check_output('git config user.email',
                                    shell=True,
                                    encoding='utf8').strip()

    changelogs = set()
    diff = PatchSet(data)

    for file in diff:
        changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
        if changelog not in changelogs:
            changelogs.add(changelog)
            with open(changelog) as f:
                content = f.read()
            with open(changelog, 'w+') as f:
                f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
                f.write('\tUpdate copyright years.\n\n')
                f.write(content)
Esempio n. 39
0
def main():
    base = sys.argv[1]
    patches = sys.argv[2]
    output = sys.argv[3]
    counter = 1
    index = []
    files = os.listdir(patches)
    pc = 0
    for pfile in sorted(files, reverse=True):
        sys.stdout.write("\r%d" % pc)
        pc += 1
        pfile = os.path.join(patches, pfile)
        try:
            patch = PatchSet.from_filename(pfile)
        except:
            continue
        base_counter = counter
        for f in patch.modified_files:
            fn = "%05d_after_%s" % (counter, f.path.replace("/", "_"))
            srcf = os.path.join(base, f.path)
            if os.path.exists(srcf):
                index.append([fn])
                copyfile(srcf, os.path.join(output, fn))
                counter += 1
        call(["git", "apply", "-R", "--reject", pfile], cwd=base)
        counter = base_counter
        for f in patch.modified_files:
            fn = "%05d_before_%s" % (counter, f.path.replace("/", "_"))
            srcf = os.path.join(base, f.path)
            if os.path.exists(srcf):
                index[counter - 1].insert(0, fn)
                copyfile(srcf, os.path.join(output, fn))
                counter += 1
    with open(os.path.join(output, "index.txt"), "w") as fout:
        for p in index:
            fout.write("%s %s\n" % tuple(p))
    print()
Esempio n. 40
0
import os
from unidiff import PatchSet
import json
projects=['Chart','Time','Lang','Closure','Math','Mockito']
l=os.listdir('.')
for patch_file in l:
    info={}
    try:
        patch = PatchSet.from_filename(patch_file)
        target_file=patch[0].source_file
    
    except:
        continue
    s=target_file.split('/')[0]
    info['ID']=patch_file
    s=s.split('_')[0]
    info['tool']='SimGen'
    info['correctness']='Incorrect'
    for p in projects:
        if p in s:
            info['project']=p
            info['bug_id']=s[len(p):-1]
    
    #print(info)
    f=open('INFO/%s.json'%patch_file,'w')
    json.dump(info,f)
    f.close()
def run(project,bugid,patch_no,tests,tmp_tracefile='tmp_c'):

    tmp_tracefile+=project+bugid+patch_no+'run_print_trace'
    tmp_tracefile=os.path.join(os.getcwd(),tmp_tracefile)
    w_buggy=project+str(bugid)+'b'
    w_patched=w_buggy+'_'+patch_no

    patchfile=os.path.join('../patches',patch_no)
    patch = PatchSet.from_filename(patchfile)
    
    source_file=patch[0].source_file
    target_file=patch[0].target_file
    
    os.system('cp '+source_file+' '+source_file+'.bak')
    os.system('cp '+target_file+' '+target_file+'.bak')

    os.system('make instru_class ARGS="-S '+source_file+' -T '+tmp_tracefile+'"')
    os.system('make instru_class ARGS="-S '+target_file+
              ' -T '+tmp_tracefile+' '+
              ' -P '+patchfile+
              ' -F '+target_file+'"')
    #
    dir_path='../traces/'+w_patched
    if(os.path.exists(tmp_tracefile)):
        os.system('rm '+tmp_tracefile)
    os.system('mkdir '+dir_path)
    os.system('mkdir '+os.path.join(dir_path,'buggy'))
    os.system('mkdir '+os.path.join(dir_path,'patched'))
    if project=='Time':
        os.system('defects4j compile -w '+w_buggy)
        os.system('defects4j compile -w '+w_patched)    
    # clone
    for test in tests:
        test=test.strip()
        testfile=os.path.join(w_buggy,get_path_to_test(w_buggy),test.split('::')[0].replace('.','/')+'.java')
        if project=='Time':
            os.system('rm '+tmp_tracefile)
            status=os.system('timeout 90 defects4j test -t '+test+' -w  '+w_buggy)
        else:
            os.system('cp '+testfile+' '+testfile+'.bak')
            os.system('make GetSingleTest_Chart ARGS="'+testfile+' '+test.split('::')[1]+'"')
            status=os.system('timeout 90 defects4j test -t '+test+' -w '+w_buggy)
            os.system('mv '+testfile+'.bak '+testfile)
        print(testfile)
        if status==0:
            os.system('mv '+tmp_tracefile+' '+os.path.join(dir_path,'buggy','__'.join(test.split('::'))))
    
        testfile=os.path.join(w_patched,get_path_to_test(w_patched),test.split('::')[0].replace('.','/')+'.java')
        if project=='Time':
            os.system('rm '+tmp_tracefile)
            status=os.system('timeout 90 defects4j test -t '+test+' -w  '+w_patched)
        else:
            os.system('cp '+testfile+' '+testfile+'.bak')
            os.system('make GetSingleTest_Chart ARGS="'+testfile+' '+test.split('::')[1]+'"')
            status=os.system('timeout 90 defects4j test -t '+test+' -w '+w_patched)
            os.system('mv '+testfile+'.bak '+testfile)        
        if status==0:
            os.system('mv '+tmp_tracefile+' '+os.path.join(dir_path,'patched','__'.join(test.split('::'))))
    # clone
    os.system('mv '+source_file+'.bak '+source_file)
    os.system('mv '+target_file+'.bak '+target_file)