def test_phabricator(mock_config, mock_revision): ''' Test a phabricator revision ''' assert not hasattr(mock_revision, 'mercurial') assert mock_revision.diff_id == 42 assert mock_revision.diff_phid == 'PHID-DIFF-test' assert mock_revision.url == 'https://phabricator.test/D51' assert repr(mock_revision) == 'PHID-DIFF-test' assert mock_revision.id == 51 # revision # Patch is automatically loaded from Phabricator assert mock_revision.patch is not None assert isinstance(mock_revision.patch, str) assert len(mock_revision.patch.split('\n')) == 14 patch = Patch.parse_patch(mock_revision.patch) assert patch == { 'test.txt': { 'touched': [], 'deleted': [], 'added': [2], 'new': False }, 'test.cpp': { 'touched': [], 'deleted': [], 'added': [2], 'new': False } }
def test_phabricator(mock_config, mock_revision): """ Test a phabricator revision """ assert not hasattr(mock_revision, "mercurial") assert mock_revision.diff_id == 42 assert mock_revision.diff_phid == "PHID-DIFF-test" assert mock_revision.url == "https://phabricator.test/D51" assert repr(mock_revision) == "PHID-DIFF-test" assert mock_revision.id == 51 # revision # Patch is automatically loaded from Phabricator assert mock_revision.patch is not None assert isinstance(mock_revision.patch, str) assert len(mock_revision.patch.split("\n")) == 14 patch = Patch.parse_patch(mock_revision.patch) assert patch == { "test.txt": { "touched": [], "deleted": [], "added": [2], "new": False }, "test.cpp": { "touched": [], "deleted": [], "added": [2], "new": False }, }
def analyze_patch(self): ''' Analyze loaded patch to extract modified lines and statistics ''' assert self.patch is not None, \ 'Missing patch' assert isinstance(self.patch, str), \ 'Invalid patch type' # List all modified lines from current revision changes patch = Patch.parse_patch(self.patch, skip_comments=False) assert patch != {}, \ 'Empty patch' self.lines = { # Use all changes in new files filename: diff.get('touched', []) + diff.get('added', []) for filename, diff in patch.items() } # Shortcut to files modified self.files = self.lines.keys() # Report nb of files and lines analyzed stats.api.increment('analysis.files', len(self.files)) stats.api.increment('analysis.lines', sum(len(line) for line in self.lines.values()))
def test_phabricator(mock_phabricator, mock_config): ''' Test a phabricator revision ''' from static_analysis_bot.revisions import PhabricatorRevision with mock_phabricator as api: r = PhabricatorRevision(api, 'PHID-DIFF-testABcd12') assert not hasattr(r, 'mercurial') assert r.diff_id == 42 assert r.diff_phid == 'PHID-DIFF-testABcd12' assert r.url == 'https://phabricator.test/D51' assert repr(r) == 'PHID-DIFF-testABcd12' assert r.id == 51 # revision # Patch is automatically loaded from Phabricator assert r.patch is not None assert isinstance(r.patch, str) assert len(r.patch.split('\n')) == 14 patch = Patch.parse_patch(r.patch) assert patch == { 'test.txt': { 'touched': [], 'deleted': [], 'added': [2], 'new': False }, 'test.cpp': { 'touched': [], 'deleted': [], 'added': [2], 'new': False } }
def load_hgmo_patch(diff): # Load the parent info as we have the try-task-config commit url = f"{diff.repository.url}/json-rev/{diff.mercurial_hash}" logging.info(f"Downloading {url}") resp = requests.get(url) resp.raise_for_status() meta = resp.json() if meta["desc"].startswith("try_task_config"): patch_rev = resp.json()["parents"][0] else: patch_rev = diff.mercurial_hash # Load the parent patch url = f"{diff.repository.url}/raw-rev/{patch_rev}" logging.info(f"Downloading {url}") resp = requests.get(url) resp.raise_for_status() patch = Patch.parse_patch(resp.text, skip_comments=False) assert patch != {}, "Empty patch" lines = { # Use all changes in new files filename: diff.get("touched", []) + diff.get("added", []) for filename, diff in patch.items() } return lines
def _transform(commit): desc = commit.desc.decode('utf-8') obj = { 'author': commit.author.decode('utf-8'), 'desc': desc, 'date': str(commit.date), 'bug_id': int(commit.bug.decode('utf-8')) if commit.bug else None, 'ever_backedout': commit.backedoutby != b'', 'added': 0, 'deleted': 0, 'files_modified_num': 0, 'types': set(), 'components': list(), 'author_experience': author_experience[commit], 'author_experience_90_days': author_experience_90_days[commit], 'author_email': commit.author_email.decode('utf-8'), } patch = HG.export(revs=[commit.node], git=True) patch_data = Patch.parse_patch(patch.decode('utf-8', 'ignore'), skip_comments=False, add_lines_for_new=True) for path, stats in patch_data.items(): if 'added' not in stats: # Must be a binary file obj['types'].add('binary') continue obj['added'] += len(stats['added']) + len(stats['touched']) obj['deleted'] += len(stats['deleted']) + len(stats['touched']) ext = os.path.splitext(path)[1] if ext in ['.js', '.jsm']: type_ = 'JavaScript' elif ext in [ '.c', '.cpp', '.cc', '.cxx', '.m', '.mm', '.h', '.hh', '.hpp', '.hxx' ]: type_ = 'C/C++' elif ext == '.java': type_ = 'Java' elif ext == '.py': type_ = 'Python' elif ext == '.rs': type_ = 'Rust' else: type_ = ext obj['types'].add(type_) obj['files_modified_num'] = len(patch_data) # Covert to a list, as a set is not JSON-serializable. obj['types'] = list(obj['types']) obj['components'] = list( set('::'.join(COMPONENTS[fl]) for fl in patch_data.keys() if COMPONENTS.get(fl))) return obj
def _transform(commit): desc = commit[5].decode('utf-8') bug_id = None bug_id_match = re.search(BUG_PATTERN, desc) if bug_id_match: bug_id = int(bug_id_match.group(1)) obj = { # 'rev': commit[0].decode('utf-8'), # 'node': commit[1].decode('utf-8'), # 'tags': commit[2].decode('utf-8'), # 'branch': commit[3].decode('utf-8'), 'author': commit[4].decode('utf-8'), 'desc': desc, # 'date': str(commit[6]), 'bug_id': bug_id, 'added': 0, 'deleted': 0, 'files_modified_num': 0, 'types': set(), } patch = HG.export(revs=[commit[1]], git=True) patch_data = Patch.parse_patch(patch.decode('utf-8', 'ignore'), skip_comments=False, add_lines_for_new=True) for path, stats in patch_data.items(): if 'added' not in stats: # Must be a binary file obj['types'].add('binary') continue obj['added'] += len(stats['added']) + len(stats['touched']) obj['deleted'] += len(stats['deleted']) + len(stats['touched']) ext = os.path.splitext(path)[1] if ext in ['.js', '.jsm']: type_ = 'JavaScript' elif ext in ['.c', '.cpp', '.h']: type_ = 'C/C++' elif ext in ['.java']: type_ = 'Java' elif ext in ['.py']: type_ = 'Python' else: type_ = ext obj['types'].add(type_) obj['files_modified_num'] = len(patch_data) # Covert to a list, as a set is not JSON-serializable. obj['types'] = list(obj['types']) return obj
def test_phabricator(mock_phabricator, mock_repository, mock_config): ''' Test a phabricator revision ''' from static_analysis_bot.revisions import PhabricatorRevision with mock_phabricator as api: r = PhabricatorRevision(api, 'PHID-DIFF-testABcd12') assert not hasattr(r, 'mercurial') assert r.diff_id == 42 assert r.diff_phid == 'PHID-DIFF-testABcd12' assert r.url == 'https://phabricator.test/D51' assert repr(r) == 'PHID-DIFF-testABcd12' assert r.id == 51 # revision # Check test.txt content test_txt = os.path.join(mock_config.repo_dir, 'test.txt') assert open(test_txt).read() == 'Hello World\n' # Cleanup the repo mock_repository.update(clean=True) # Load full patch # Mock the mercurial repo update as we use a dummy revision assert r.patch is None __update = mock_repository.update mock_repository.update = MagicMock(return_value=True) r.load(mock_repository) mock_repository.update = __update assert r.patch is not None assert isinstance(r.patch, str) assert len(r.patch.split('\n')) == 14 patch = Patch.parse_patch(r.patch) assert patch == { 'test.txt': { 'touched': [], 'deleted': [], 'added': [2], 'new': False }, 'test.cpp': { 'touched': [], 'deleted': [], 'added': [2], 'new': False } } # Check file is untouched after load assert open(test_txt).read() == 'Hello World\n' # Check file is updated after apply r.apply(mock_repository) assert open(test_txt).read() == 'Hello World\nSecond line\n'
def test(self): revs = [ 'c4c0ad8b3eaa', 'f045ac9f76cf', 'c58e9e70f971', 'd7a700707ddb', '81d3e4a2f3f3', '7e60ad275b73', 'f9b391e62608', '7dabae5e261a', 'c6f9187b0b2e', 'd4f80c4ba719', 'b184c87f7606' ] for rev in revs: path = 'tests/patches/{}.patch'.format(rev) patch = self.readfile(path) r1 = Patch.parse_patch(patch, skip_comments=False) r2 = self.get_touched(patch) self.compare(r1, r2)
def parse(chgset, channel="nightly", chunk_size=1000000): url = RawRevision.get_url(channel) logger.info("Get patch for revision {}".format(chgset)) try: res = Patch.parse_changeset( url, chgset, file_filter=utils.is_interesting_file, skip_comments=True ) return res except Exception as e: msg = "Error in parsing patch with revision {}" logger.error(msg.format(chgset)) raise e
def test_new(self): def filt(f): return f.endswith('jsm') or f.endswith('js') or f.endswith('ini') path = 'tests/patches/b184c87f7606.patch' patch = self.readfile(path) r1 = Patch.parse_patch(patch, skip_comments=False, file_filter=filt) for name, info in r1.items(): if info['new']: self.assertEqual( name, 'browser/tools/mozscreenshots/browser_screenshots_cropping.js' ) r2 = Patch.parse_patch(patch, skip_comments=False, add_lines_for_new=True, file_filter=filt) for name, info in r2.items(): if info['new']: self.assertEqual( name, 'browser/tools/mozscreenshots/browser_screenshots_cropping.js' ) self.assertEqual(info['added'], list(range(1, 83)))
def test_phabricator(mock_phabricator, mock_repository): ''' Test a phabricator revision ''' from shipit_static_analysis.revisions import PhabricatorRevision from shipit_static_analysis.report.phabricator import PhabricatorReporter api = PhabricatorReporter({ 'url': 'http://phabricator.test/api/', 'api_key': 'deadbeef', }) r = PhabricatorRevision('51:PHID-DIFF-testABcd12', api) assert not hasattr(r, 'mercurial') assert r.diff_id == 42 assert r.diff_phid == 'PHID-DIFF-testABcd12' assert r.url == 'https://phabricator.test/PHID-DIFF-testABcd12/' assert r.build_diff_name() == 'PHID-DIFF-testABcd12-clang-format.diff' assert r.id == 51 # revision # Check test.txt content test_txt = os.path.join(mock_repository.directory, 'test.txt') assert open(test_txt).read() == 'Hello World\n' # Load full patch assert r.patch is None r.apply(mock_repository) assert r.patch is not None assert isinstance(r.patch, str) assert len(r.patch.split('\n')) == 7 patch = Patch.parse_patch(r.patch) assert patch == { 'test.txt': { 'touched': [], 'deleted': [], 'added': [2], 'new': False } } # Check file is updated assert open(test_txt).read() == 'Hello World\nSecond line\n'
def analyze_files(self, repo): ''' Analyze modified files/lines ''' assert isinstance(repo, hglib.client.hgclient) # Get the parents revisions parent_rev = 'parents({})'.format(self.mercurial) parents = repo.identify(id=True, rev=parent_rev).decode('utf-8').strip() # Find modified files by this revision self.files = [] for parent in parents.split('\n'): changeset = '{}:{}'.format(parent, self.mercurial) status = repo.status(change=[ changeset, ]) self.files += [f.decode('utf-8') for _, f in status] logger.info('Modified files', files=self.files) # List all modified lines from current revision changes patch = Patch.parse_patch( repo.diff(change=self.mercurial, git=True).decode('utf-8'), skip_comments=False, ) self.lines = { # Use all changes in new files filename: diff.get('touched', []) + diff.get('added', []) for filename, diff in patch.items() } # Report nb of files and lines analyzed stats.api.increment('analysis.files', len(self.files)) stats.api.increment('analysis.lines', sum(len(line) for line in self.lines.values()))
def analyze_patch(self): """ Analyze loaded patch to extract modified lines and statistics """ assert self.patch is not None, "Missing patch" assert isinstance(self.patch, str), "Invalid patch type" # List all modified lines from current revision changes patch = Patch.parse_patch(self.patch, skip_comments=False) assert patch != {}, "Empty patch" self.lines = { # Use all changes in new files filename: diff.get("touched", []) + diff.get("added", []) for filename, diff in patch.items() } # Shortcut to files modified self.files = self.lines.keys() # Report nb of files and lines analyzed stats.add_metric("analysis.files", len(self.files)) stats.add_metric("analysis.lines", sum(len(line) for line in self.lines.values()))
def test_local(self): self.assertEqual(Patch.parse_file('tests/patches/8be2131ed183.patch', get_hunks=True), self.knownCorrect)
def run(self, revision): ''' Run ./mach clang-format on all of the C/C++ files from the patch ''' assert isinstance(revision, Revision) cmd = ['gecko-env', './mach', '--log-no-times', 'clang-format', '-p'] # Returns a list of eligible files for format def get_eligible_files(): files = [] # Append to the files list each C/C++ file for format for file in revision.files: # Verify if file is clang-format compliant, meaning that's a C/C++ _, ext = os.path.splitext(file) if ext.lower() in settings.cpp_extensions: files.append(file) return files files_to_format = get_eligible_files() if not files_to_format: logger.info('No eligible files found to format.') return [] # Append to the cmd the files that will be formatted cmd += files_to_format # Run command and commit the current revision for `./mach clang-format ...` to reformat its changes logger.info('Running ./mach clang-format', cmd=' '.join(cmd)) clang_output = subprocess.check_output( cmd, cwd=settings.repo_dir).decode('utf-8') # Dump raw clang-format output as a Taskcluster artifact (for debugging) clang_output_path = os.path.join( settings.taskcluster.results_dir, '{}-clang-format.txt'.format(repr(revision)), ) with open(clang_output_path, 'w') as f: f.write(clang_output) # Look for any fixes `./mach clang-format` may have found # on allowed files allowed_paths = [ os.path.join(settings.repo_dir, path).encode('utf-8') # needed for hglib for path in filter(settings.is_allowed_path, revision.files) ] client = hglib.open(settings.repo_dir) self.diff = client.diff(files=allowed_paths, unified=8).decode('utf-8') if not self.diff: return [] # Store that diff as an improvement patch sent to devs revision.add_improvement_patch('clang-format', self.diff) # Generate a reverse diff for `parsepatch` (in order to get original # line numbers from the dev's patch instead of new line numbers) reverse_diff = client.diff(unified=8, reverse=True).decode('utf-8') # List all the lines that were fixed by `./mach clang-format` patch = Patch.parse_patch(reverse_diff, skip_comments=False) assert patch != {}, \ 'Empty patch' # Build `ClangFormatIssue`s issues = [] for filename, diff in patch.items(): lines = sorted(diff.get('touched', []) + diff.get('added', [])) # Group consecutive lines together (algorithm by calixte) groups = [] group = [lines[0]] for line in lines[1:]: # If the line is not consecutive with the group, start a new # group if line != group[-1] + 1: groups.append(group) group = [] group.append(line) # Don't forget to add the last group groups.append(group) issues += [ ClangFormatIssue(filename, group[0], len(group), revision) for group in groups ] stats.report_issues('clang-format', issues) return issues
def run(self, revision): ''' Run ./mach clang-format on all of the C/C++ files from the patch ''' assert isinstance(revision, Revision) cmd = [ 'gecko-env', './mach', '--log-no-times', 'clang-format', '-p' ] # Returns a list of eligible files for format def get_eligible_files(): files = [] # Append to the files list each C/C++ file for format for file in revision.files: # Verify if file is clang-format compliant, meaning that's a C/C++ _, ext = os.path.splitext(file) if ext.lower() in frozenset.union(settings.cpp_extensions, settings.cpp_header_extensions): files.append(file) return files files_to_format = get_eligible_files() if not files_to_format: logger.info('No eligible files found to format.') return [] # Append to the cmd the files that will be formatted cmd += files_to_format # Run command and commit the current revision for `./mach clang-format ...` to reformat its changes logger.info('Running ./mach clang-format', cmd=' '.join(cmd)) clang_output = subprocess.check_output( cmd, cwd=settings.repo_dir).decode('utf-8') # Dump raw clang-format output as a Taskcluster artifact (for debugging) clang_output_path = os.path.join( settings.taskcluster.results_dir, '{}-clang-format.txt'.format(repr(revision)), ) with open(clang_output_path, 'w') as f: f.write(clang_output) # Look for any fixes `./mach clang-format` may have found # on allowed files allowed_paths = [ os.path.join(settings.repo_dir, path).encode('utf-8') # needed for hglib for path in filter(settings.is_allowed_path, revision.files) ] client = hglib.open(settings.repo_dir) self.diff = client.diff(files=allowed_paths, unified=8).decode('utf-8') if not self.diff: return [] # Store that diff as an improvement patch sent to devs revision.add_improvement_patch('clang-format', self.diff) # Generate a reverse diff for `parsepatch` (in order to get original # line numbers from the dev's patch instead of new line numbers) reverse_diff = client.diff(unified=8, reverse=True).decode('utf-8') # List all the lines that were fixed by `./mach clang-format` patch = Patch.parse_patch(reverse_diff, skip_comments=False) assert patch != {}, \ 'Empty patch' # Build `ClangFormatIssue`s issues = [] for filename, diff in patch.items(): lines = sorted(diff.get('touched', []) + diff.get('added', [])) # Group consecutive lines together (algorithm by calixte) groups = [] group = [lines[0]] for line in lines[1:]: # If the line is not consecutive with the group, start a new # group if line != group[-1] + 1: groups.append(group) group = [] group.append(line) # Don't forget to add the last group groups.append(group) issues += [ ClangFormatIssue(filename, g[0], len(g), revision) for g in groups ] stats.report_issues('clang-format', issues) return issues
def run(self, revision): ''' Run the static analysis workflow: * Pull revision from review * Checkout revision * Run static analysis * Publish results ''' assert revision.mercurial is not None, \ 'Cannot run without a mercurial revision' # Add log to find Taskcluster task in papertrail logger.info( 'New static analysis', taskcluster_task=self.taskcluster_task_id, taskcluster_run=self.taskcluster_run_id, channel=settings.app_channel, revision=revision, ) # Setup tools (clang & mozlint) clang_tidy = CLANG_TIDY in self.analyzers and ClangTidy( self.repo_dir, settings.target) clang_format = CLANG_FORMAT in self.analyzers and ClangFormat( self.repo_dir) mozlint = MOZLINT in self.analyzers and MozLint(self.repo_dir) # Force cleanup to reset tip # otherwise previous pull are there self.hg.update(rev=b'tip', clean=True) # Pull revision from review self.hg.pull(source=REPO_REVIEW, rev=revision.mercurial, update=True, force=True) # Update to the target revision self.hg.update(rev=revision.mercurial, clean=True) # Get the parents revisions parent_rev = 'parents({})'.format(revision.mercurial) parents = self.hg.identify(id=True, rev=parent_rev).decode('utf-8').strip() # Find modified files by this revision modified_files = [] for parent in parents.split('\n'): changeset = '{}:{}'.format(parent, revision.mercurial) status = self.hg.status(change=[ changeset, ]) modified_files += [f.decode('utf-8') for _, f in status] logger.info('Modified files', files=modified_files) # List all modified lines from current revision changes patch = Patch.parse_patch( self.hg.diff(change=revision.mercurial, git=True).decode('utf-8'), skip_comments=False, ) modified_lines = { # Use all changes in new files filename: diff.get('touched', []) + diff.get('added', []) for filename, diff in patch.items() } # mach configure with mozconfig logger.info('Mach configure...') run_check(['gecko-env', './mach', 'configure'], cwd=self.repo_dir) # Build CompileDB backend logger.info('Mach build backend...') cmd = ['gecko-env', './mach', 'build-backend', '--backend=CompileDB'] run_check(cmd, cwd=self.repo_dir) # Build exports logger.info('Mach build exports...') run_check(['gecko-env', './mach', 'build', 'pre-export'], cwd=self.repo_dir) run_check(['gecko-env', './mach', 'build', 'export'], cwd=self.repo_dir) # Run static analysis through clang-tidy issues = [] if clang_tidy: logger.info('Run clang-tidy...') issues += clang_tidy.run(settings.clang_checkers, modified_lines) else: logger.info('Skip clang-tidy') # Run clang-format on modified files diff_url = None if clang_format: logger.info('Run clang-format...') format_issues, patched = clang_format.run(settings.cpp_extensions, modified_lines) issues += format_issues if patched: # Get current diff on these files logger.info('Found clang-format issues', files=patched) files = list( map( lambda x: os.path.join(self.repo_dir, x).encode( 'utf-8'), patched)) diff = self.hg.diff(files) assert diff is not None and diff != b'', \ 'Empty diff' # Write diff in results directory diff_path = os.path.join(self.taskcluster_results_dir, revision.build_diff_name()) with open(diff_path, 'w') as f: length = f.write(diff.decode('utf-8')) logger.info('Diff from clang-format dumped', path=diff_path, length=length) # noqa # Build diff download url diff_url = ARTIFACT_URL.format( task_id=self.taskcluster_task_id, run_id=self.taskcluster_run_id, diff_name=revision.build_diff_name(), ) logger.info('Diff available online', url=diff_url) else: logger.info('No clang-format issues') else: logger.info('Skip clang-format') # Run linter if mozlint: logger.info('Run mozlint...') issues += mozlint.run(modified_lines) else: logger.info('Skip mozlint') logger.info('Detected {} issue(s)'.format(len(issues))) if not issues: logger.info('No issues, stopping there.') return # Publish reports about these issues for reporter in self.reporters.values(): reporter.publish(issues, revision, diff_url)
def test_diff_r(self): path = 'tests/patches/janx.patch' patch = self.readfile(path) r1 = Patch.parse_patch(patch, skip_comments=False) r2 = self.get_touched(patch) self.compare(r1, r2)
def test_remote(self): self.assertEqual(Patch.parse_changeset('https://hg.mozilla.org/mozilla-central/raw-rev', '8be2131ed183', get_hunks=True), self.knownCorrect)
def _transform(commit): desc = commit.desc.decode("utf-8") obj = { "node": commit.node.decode("utf-8"), "author": commit.author.decode("utf-8"), "reviewers": commit.reviewers, "desc": desc, "date": str(commit.date), "pushdate": str(commit.pushdate), "bug_id": int(commit.bug.decode("utf-8")) if commit.bug else None, "ever_backedout": commit.backedoutby != b"", "added": 0, "test_added": 0, "deleted": 0, "test_deleted": 0, "files_modified_num": 0, "types": set(), "components": list(), "author_experience": experiences_by_commit["total"]["author"][commit.node], f"author_experience_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[ EXPERIENCE_TIMESPAN_TEXT ]["author"][commit.node], "reviewer_experience": experiences_by_commit["total"]["reviewer"][commit.node], f"reviewer_experience_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[ EXPERIENCE_TIMESPAN_TEXT ]["reviewer"][commit.node], "author_email": commit.author_email.decode("utf-8"), "components_touched_prev": experiences_by_commit["total"]["component"][ commit.node ], f"components_touched_prev_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[ EXPERIENCE_TIMESPAN_TEXT ]["component"][commit.node], "files_touched_prev": experiences_by_commit["total"]["file"][commit.node], f"files_touched_prev_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[ EXPERIENCE_TIMESPAN_TEXT ]["file"][commit.node], "directories_touched_prev": experiences_by_commit["total"]["directory"][ commit.node ], f"directories_touched_prev_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[ EXPERIENCE_TIMESPAN_TEXT ]["directory"][commit.node], } sizes = [] patch = HG.export(revs=[commit.node], git=True) patch_data = Patch.parse_patch( patch.decode("utf-8", "ignore"), skip_comments=False, add_lines_for_new=True ) for path, stats in patch_data.items(): if "added" not in stats: # Must be a binary file obj["types"].add("binary") continue if is_test(path): obj["test_added"] += len(stats["added"]) + len(stats["touched"]) obj["test_deleted"] += len(stats["deleted"]) + len(stats["touched"]) else: obj["added"] += len(stats["added"]) + len(stats["touched"]) obj["deleted"] += len(stats["deleted"]) + len(stats["touched"]) ext = os.path.splitext(path)[1] if ext in [".js", ".jsm"]: type_ = "JavaScript" elif ext in [ ".c", ".cpp", ".cc", ".cxx", ".m", ".mm", ".h", ".hh", ".hpp", ".hxx", ]: type_ = "C/C++" elif ext == ".java": type_ = "Java" elif ext == ".py": type_ = "Python" elif ext == ".rs": type_ = "Rust" else: type_ = ext obj["types"].add(type_) try: after = HG.cat([path.encode("utf-8")], rev=commit.node) except hglib.error.CommandError as e: if b"no such file in rev" in e.err: after = b"" else: raise sizes.append(after.count(b"\n")) obj["total_file_size"] = sum(sizes) obj["average_file_size"] = ( obj["total_file_size"] / len(sizes) if len(sizes) > 0 else 0 ) obj["maximum_file_size"] = max(sizes) obj["minimum_file_size"] = min(sizes) obj["files_modified_num"] = len(patch_data) # Covert to a list, as a set is not JSON-serializable. obj["types"] = list(obj["types"]) obj["components"] = list( set( path_to_component[path] for path in patch_data.keys() if path_to_component.get(path) ) ) return obj
def run(self, revision): ''' Run ./mach clang-format on the current patch ''' assert isinstance(revision, Revision) # Commit the current revision for `./mach clang-format` to reformat its changes cmd = [ 'gecko-env', './mach', '--log-no-times', 'clang-format', ] logger.info('Running ./mach clang-format', cmd=' '.join(cmd)) # Run command clang_output = subprocess.check_output( cmd, cwd=settings.repo_dir).decode('utf-8') # Dump raw clang-format output as a Taskcluster artifact (for debugging) clang_output_path = os.path.join( settings.taskcluster.results_dir, '{}-clang-format.txt'.format(repr(revision)), ) with open(clang_output_path, 'w') as f: f.write(clang_output) # Look for any fixes `./mach clang-format` may have found client = hglib.open(settings.repo_dir) self.diff = client.diff(unified=8).decode('utf-8') if not self.diff: return [] # Store that diff as an improvement patch sent to devs revision.add_improvement_patch('clang-format', self.diff) # Generate a reverse diff for `parsepatch` (in order to get original # line numbers from the dev's patch instead of new line numbers) reverse_diff = client.diff(unified=8, reverse=True).decode('utf-8') # List all the lines that were fixed by `./mach clang-format` patch = Patch.parse_patch(reverse_diff, skip_comments=False) assert patch != {}, \ 'Empty patch' # Build `ClangFormatIssue`s issues = [] for filename, diff in patch.items(): lines = sorted(diff.get('touched', []) + diff.get('added', [])) # Group consecutive lines together (algorithm by calixte) groups = [] group = [lines[0]] for line in lines[1:]: # If the line is not consecutive with the group, start a new # group if line != group[-1] + 1: groups.append(group) group = [] group.append(line) # Don't forget to add the last group groups.append(group) issues += [ ClangFormatIssue(filename, group[0], len(group), revision) for group in groups ] stats.report_issues('clang-format', issues) return issues
def _transform(commit): desc = commit.desc.decode("utf-8") obj = { "author": commit.author.decode("utf-8"), "desc": desc, "date": str(commit.date), "bug_id": int(commit.bug.decode("utf-8")) if commit.bug else None, "ever_backedout": commit.backedoutby != b"", "added": 0, "test_added": 0, "deleted": 0, "test_deleted": 0, "files_modified_num": 0, "types": set(), "components": list(), "author_experience": author_experience[commit.node], "author_experience_90_days": author_experience_90_days[commit.node], "author_email": commit.author_email.decode("utf-8"), "components_touched_prev": components_touched_prev[commit.node], "components_touched_prev_90_days": components_touched_prev_90_days[commit.node], "files_touched_prev": files_touched_prev[commit.node], "files_touched_prev_90_days": files_touched_prev_90_days[commit.node], } patch = HG.export(revs=[commit.node], git=True) patch_data = Patch.parse_patch(patch.decode("utf-8", "ignore"), skip_comments=False, add_lines_for_new=True) for path, stats in patch_data.items(): if "added" not in stats: # Must be a binary file obj["types"].add("binary") continue if is_test(path): obj["test_added"] += len(stats["added"]) + len(stats["touched"]) obj["test_deleted"] += len(stats["deleted"]) + len( stats["touched"]) else: obj["added"] += len(stats["added"]) + len(stats["touched"]) obj["deleted"] += len(stats["deleted"]) + len(stats["touched"]) ext = os.path.splitext(path)[1] if ext in [".js", ".jsm"]: type_ = "JavaScript" elif ext in [ ".c", ".cpp", ".cc", ".cxx", ".m", ".mm", ".h", ".hh", ".hpp", ".hxx", ]: type_ = "C/C++" elif ext == ".java": type_ = "Java" elif ext == ".py": type_ = "Python" elif ext == ".rs": type_ = "Rust" else: type_ = ext obj["types"].add(type_) obj["files_modified_num"] = len(patch_data) # Covert to a list, as a set is not JSON-serializable. obj["types"] = list(obj["types"]) obj["components"] = list( set(path_to_component[path] for path in patch_data.keys() if path_to_component.get(path))) return obj