def test_phabricator(mock_config, mock_revision):
    '''
    Test a phabricator revision
    '''
    assert not hasattr(mock_revision, 'mercurial')
    assert mock_revision.diff_id == 42
    assert mock_revision.diff_phid == 'PHID-DIFF-test'
    assert mock_revision.url == 'https://phabricator.test/D51'
    assert repr(mock_revision) == 'PHID-DIFF-test'
    assert mock_revision.id == 51  # revision

    # Patch is automatically loaded from Phabricator
    assert mock_revision.patch is not None
    assert isinstance(mock_revision.patch, str)
    assert len(mock_revision.patch.split('\n')) == 14
    patch = Patch.parse_patch(mock_revision.patch)
    assert patch == {
        'test.txt': {
            'touched': [],
            'deleted': [],
            'added': [2],
            'new': False
        },
        'test.cpp': {
            'touched': [],
            'deleted': [],
            'added': [2],
            'new': False
        }
    }
Beispiel #2
0
def test_phabricator(mock_config, mock_revision):
    """
    Test a phabricator revision
    """
    assert not hasattr(mock_revision, "mercurial")
    assert mock_revision.diff_id == 42
    assert mock_revision.diff_phid == "PHID-DIFF-test"
    assert mock_revision.url == "https://phabricator.test/D51"
    assert repr(mock_revision) == "PHID-DIFF-test"
    assert mock_revision.id == 51  # revision

    # Patch is automatically loaded from Phabricator
    assert mock_revision.patch is not None
    assert isinstance(mock_revision.patch, str)
    assert len(mock_revision.patch.split("\n")) == 14
    patch = Patch.parse_patch(mock_revision.patch)
    assert patch == {
        "test.txt": {
            "touched": [],
            "deleted": [],
            "added": [2],
            "new": False
        },
        "test.cpp": {
            "touched": [],
            "deleted": [],
            "added": [2],
            "new": False
        },
    }
Beispiel #3
0
    def analyze_patch(self):
        '''
        Analyze loaded patch to extract modified lines
        and statistics
        '''
        assert self.patch is not None, \
            'Missing patch'
        assert isinstance(self.patch, str), \
            'Invalid patch type'

        # List all modified lines from current revision changes
        patch = Patch.parse_patch(self.patch, skip_comments=False)
        assert patch != {}, \
            'Empty patch'
        self.lines = {
            # Use all changes in new files
            filename: diff.get('touched', []) + diff.get('added', [])
            for filename, diff in patch.items()
        }

        # Shortcut to files modified
        self.files = self.lines.keys()

        # Report nb of files and lines analyzed
        stats.api.increment('analysis.files', len(self.files))
        stats.api.increment('analysis.lines',
                            sum(len(line) for line in self.lines.values()))
Beispiel #4
0
def test_phabricator(mock_phabricator, mock_config):
    '''
    Test a phabricator revision
    '''
    from static_analysis_bot.revisions import PhabricatorRevision

    with mock_phabricator as api:
        r = PhabricatorRevision(api, 'PHID-DIFF-testABcd12')
    assert not hasattr(r, 'mercurial')
    assert r.diff_id == 42
    assert r.diff_phid == 'PHID-DIFF-testABcd12'
    assert r.url == 'https://phabricator.test/D51'
    assert repr(r) == 'PHID-DIFF-testABcd12'
    assert r.id == 51  # revision

    # Patch is automatically loaded from Phabricator
    assert r.patch is not None
    assert isinstance(r.patch, str)
    assert len(r.patch.split('\n')) == 14
    patch = Patch.parse_patch(r.patch)
    assert patch == {
        'test.txt': {
            'touched': [],
            'deleted': [],
            'added': [2],
            'new': False
        },
        'test.cpp': {
            'touched': [],
            'deleted': [],
            'added': [2],
            'new': False
        }
    }
Beispiel #5
0
def load_hgmo_patch(diff):
    # Load the parent info as we have the try-task-config commit
    url = f"{diff.repository.url}/json-rev/{diff.mercurial_hash}"
    logging.info(f"Downloading {url}")
    resp = requests.get(url)
    resp.raise_for_status()
    meta = resp.json()
    if meta["desc"].startswith("try_task_config"):
        patch_rev = resp.json()["parents"][0]
    else:
        patch_rev = diff.mercurial_hash

    # Load the parent patch
    url = f"{diff.repository.url}/raw-rev/{patch_rev}"
    logging.info(f"Downloading {url}")
    resp = requests.get(url)
    resp.raise_for_status()

    patch = Patch.parse_patch(resp.text, skip_comments=False)
    assert patch != {}, "Empty patch"
    lines = {
        # Use all changes in new files
        filename: diff.get("touched", []) + diff.get("added", [])
        for filename, diff in patch.items()
    }

    return lines
Beispiel #6
0
def _transform(commit):
    desc = commit.desc.decode('utf-8')

    obj = {
        'author': commit.author.decode('utf-8'),
        'desc': desc,
        'date': str(commit.date),
        'bug_id': int(commit.bug.decode('utf-8')) if commit.bug else None,
        'ever_backedout': commit.backedoutby != b'',
        'added': 0,
        'deleted': 0,
        'files_modified_num': 0,
        'types': set(),
        'components': list(),
        'author_experience': author_experience[commit],
        'author_experience_90_days': author_experience_90_days[commit],
        'author_email': commit.author_email.decode('utf-8'),
    }

    patch = HG.export(revs=[commit.node], git=True)
    patch_data = Patch.parse_patch(patch.decode('utf-8', 'ignore'),
                                   skip_comments=False,
                                   add_lines_for_new=True)
    for path, stats in patch_data.items():
        if 'added' not in stats:
            # Must be a binary file
            obj['types'].add('binary')
            continue

        obj['added'] += len(stats['added']) + len(stats['touched'])
        obj['deleted'] += len(stats['deleted']) + len(stats['touched'])
        ext = os.path.splitext(path)[1]
        if ext in ['.js', '.jsm']:
            type_ = 'JavaScript'
        elif ext in [
                '.c', '.cpp', '.cc', '.cxx', '.m', '.mm', '.h', '.hh', '.hpp',
                '.hxx'
        ]:
            type_ = 'C/C++'
        elif ext == '.java':
            type_ = 'Java'
        elif ext == '.py':
            type_ = 'Python'
        elif ext == '.rs':
            type_ = 'Rust'
        else:
            type_ = ext
        obj['types'].add(type_)

    obj['files_modified_num'] = len(patch_data)

    # Covert to a list, as a set is not JSON-serializable.
    obj['types'] = list(obj['types'])

    obj['components'] = list(
        set('::'.join(COMPONENTS[fl]) for fl in patch_data.keys()
            if COMPONENTS.get(fl)))

    return obj
Beispiel #7
0
def _transform(commit):
    desc = commit[5].decode('utf-8')

    bug_id = None
    bug_id_match = re.search(BUG_PATTERN, desc)
    if bug_id_match:
        bug_id = int(bug_id_match.group(1))

    obj = {
        # 'rev': commit[0].decode('utf-8'),
        # 'node': commit[1].decode('utf-8'),
        # 'tags': commit[2].decode('utf-8'),
        # 'branch': commit[3].decode('utf-8'),
        'author': commit[4].decode('utf-8'),
        'desc': desc,
        # 'date': str(commit[6]),
        'bug_id': bug_id,
        'added': 0,
        'deleted': 0,
        'files_modified_num': 0,
        'types': set(),
    }

    patch = HG.export(revs=[commit[1]], git=True)
    patch_data = Patch.parse_patch(patch.decode('utf-8', 'ignore'),
                                   skip_comments=False,
                                   add_lines_for_new=True)
    for path, stats in patch_data.items():
        if 'added' not in stats:
            # Must be a binary file
            obj['types'].add('binary')
            continue

        obj['added'] += len(stats['added']) + len(stats['touched'])
        obj['deleted'] += len(stats['deleted']) + len(stats['touched'])
        ext = os.path.splitext(path)[1]
        if ext in ['.js', '.jsm']:
            type_ = 'JavaScript'
        elif ext in ['.c', '.cpp', '.h']:
            type_ = 'C/C++'
        elif ext in ['.java']:
            type_ = 'Java'
        elif ext in ['.py']:
            type_ = 'Python'
        else:
            type_ = ext
        obj['types'].add(type_)

    obj['files_modified_num'] = len(patch_data)

    # Covert to a list, as a set is not JSON-serializable.
    obj['types'] = list(obj['types'])

    return obj
def test_phabricator(mock_phabricator, mock_repository, mock_config):
    '''
    Test a phabricator revision
    '''
    from static_analysis_bot.revisions import PhabricatorRevision

    with mock_phabricator as api:
        r = PhabricatorRevision(api, 'PHID-DIFF-testABcd12')
    assert not hasattr(r, 'mercurial')
    assert r.diff_id == 42
    assert r.diff_phid == 'PHID-DIFF-testABcd12'
    assert r.url == 'https://phabricator.test/D51'
    assert repr(r) == 'PHID-DIFF-testABcd12'
    assert r.id == 51  # revision

    # Check test.txt content
    test_txt = os.path.join(mock_config.repo_dir, 'test.txt')
    assert open(test_txt).read() == 'Hello World\n'

    # Cleanup the repo
    mock_repository.update(clean=True)

    # Load full patch
    # Mock the mercurial repo update as we use a dummy revision
    assert r.patch is None
    __update = mock_repository.update
    mock_repository.update = MagicMock(return_value=True)
    r.load(mock_repository)
    mock_repository.update = __update
    assert r.patch is not None
    assert isinstance(r.patch, str)
    assert len(r.patch.split('\n')) == 14
    patch = Patch.parse_patch(r.patch)
    assert patch == {
        'test.txt': {
            'touched': [],
            'deleted': [],
            'added': [2],
            'new': False
        },
        'test.cpp': {
            'touched': [],
            'deleted': [],
            'added': [2],
            'new': False
        }
    }

    # Check file is untouched after load
    assert open(test_txt).read() == 'Hello World\n'

    # Check file is updated after apply
    r.apply(mock_repository)
    assert open(test_txt).read() == 'Hello World\nSecond line\n'
Beispiel #9
0
 def test(self):
     revs = [
         'c4c0ad8b3eaa', 'f045ac9f76cf', 'c58e9e70f971', 'd7a700707ddb',
         '81d3e4a2f3f3', '7e60ad275b73', 'f9b391e62608', '7dabae5e261a',
         'c6f9187b0b2e', 'd4f80c4ba719', 'b184c87f7606'
     ]
     for rev in revs:
         path = 'tests/patches/{}.patch'.format(rev)
         patch = self.readfile(path)
         r1 = Patch.parse_patch(patch, skip_comments=False)
         r2 = self.get_touched(patch)
         self.compare(r1, r2)
Beispiel #10
0
def parse(chgset, channel="nightly", chunk_size=1000000):
    url = RawRevision.get_url(channel)
    logger.info("Get patch for revision {}".format(chgset))
    try:
        res = Patch.parse_changeset(
            url, chgset, file_filter=utils.is_interesting_file, skip_comments=True
        )
        return res
    except Exception as e:
        msg = "Error in parsing patch with revision {}"
        logger.error(msg.format(chgset))
        raise e
Beispiel #11
0
    def test_new(self):
        def filt(f):
            return f.endswith('jsm') or f.endswith('js') or f.endswith('ini')

        path = 'tests/patches/b184c87f7606.patch'
        patch = self.readfile(path)
        r1 = Patch.parse_patch(patch, skip_comments=False, file_filter=filt)
        for name, info in r1.items():
            if info['new']:
                self.assertEqual(
                    name,
                    'browser/tools/mozscreenshots/browser_screenshots_cropping.js'
                )
        r2 = Patch.parse_patch(patch,
                               skip_comments=False,
                               add_lines_for_new=True,
                               file_filter=filt)
        for name, info in r2.items():
            if info['new']:
                self.assertEqual(
                    name,
                    'browser/tools/mozscreenshots/browser_screenshots_cropping.js'
                )
                self.assertEqual(info['added'], list(range(1, 83)))
Beispiel #12
0
def test_phabricator(mock_phabricator, mock_repository):
    '''
    Test a phabricator revision
    '''
    from shipit_static_analysis.revisions import PhabricatorRevision
    from shipit_static_analysis.report.phabricator import PhabricatorReporter

    api = PhabricatorReporter({
        'url': 'http://phabricator.test/api/',
        'api_key': 'deadbeef',
    })

    r = PhabricatorRevision('51:PHID-DIFF-testABcd12', api)
    assert not hasattr(r, 'mercurial')
    assert r.diff_id == 42
    assert r.diff_phid == 'PHID-DIFF-testABcd12'
    assert r.url == 'https://phabricator.test/PHID-DIFF-testABcd12/'
    assert r.build_diff_name() == 'PHID-DIFF-testABcd12-clang-format.diff'
    assert r.id == 51  # revision

    # Check test.txt content
    test_txt = os.path.join(mock_repository.directory, 'test.txt')
    assert open(test_txt).read() == 'Hello World\n'

    # Load full patch
    assert r.patch is None
    r.apply(mock_repository)
    assert r.patch is not None
    assert isinstance(r.patch, str)
    assert len(r.patch.split('\n')) == 7
    patch = Patch.parse_patch(r.patch)
    assert patch == {
        'test.txt': {
            'touched': [],
            'deleted': [],
            'added': [2],
            'new': False
        }
    }

    # Check file is updated
    assert open(test_txt).read() == 'Hello World\nSecond line\n'
Beispiel #13
0
    def analyze_files(self, repo):
        '''
        Analyze modified files/lines
        '''
        assert isinstance(repo, hglib.client.hgclient)

        # Get the parents revisions
        parent_rev = 'parents({})'.format(self.mercurial)
        parents = repo.identify(id=True,
                                rev=parent_rev).decode('utf-8').strip()

        # Find modified files by this revision
        self.files = []
        for parent in parents.split('\n'):
            changeset = '{}:{}'.format(parent, self.mercurial)
            status = repo.status(change=[
                changeset,
            ])
            self.files += [f.decode('utf-8') for _, f in status]
        logger.info('Modified files', files=self.files)

        # List all modified lines from current revision changes
        patch = Patch.parse_patch(
            repo.diff(change=self.mercurial, git=True).decode('utf-8'),
            skip_comments=False,
        )
        self.lines = {
            # Use all changes in new files
            filename: diff.get('touched', []) + diff.get('added', [])
            for filename, diff in patch.items()
        }

        # Report nb of files and lines analyzed
        stats.api.increment('analysis.files', len(self.files))
        stats.api.increment('analysis.lines',
                            sum(len(line) for line in self.lines.values()))
Beispiel #14
0
    def analyze_patch(self):
        """
        Analyze loaded patch to extract modified lines
        and statistics
        """
        assert self.patch is not None, "Missing patch"
        assert isinstance(self.patch, str), "Invalid patch type"

        # List all modified lines from current revision changes
        patch = Patch.parse_patch(self.patch, skip_comments=False)
        assert patch != {}, "Empty patch"
        self.lines = {
            # Use all changes in new files
            filename: diff.get("touched", []) + diff.get("added", [])
            for filename, diff in patch.items()
        }

        # Shortcut to files modified
        self.files = self.lines.keys()

        # Report nb of files and lines analyzed
        stats.add_metric("analysis.files", len(self.files))
        stats.add_metric("analysis.lines",
                         sum(len(line) for line in self.lines.values()))
Beispiel #15
0
 def test_local(self):
     self.assertEqual(Patch.parse_file('tests/patches/8be2131ed183.patch', get_hunks=True), self.knownCorrect)
Beispiel #16
0
    def run(self, revision):
        '''
        Run ./mach clang-format on all of the C/C++ files from the patch
        '''
        assert isinstance(revision, Revision)

        cmd = ['gecko-env', './mach', '--log-no-times', 'clang-format', '-p']

        # Returns a list of eligible files for format
        def get_eligible_files():
            files = []
            # Append to the files list each C/C++ file for format
            for file in revision.files:
                # Verify if file is clang-format compliant, meaning that's a C/C++
                _, ext = os.path.splitext(file)
                if ext.lower() in settings.cpp_extensions:
                    files.append(file)
            return files

        files_to_format = get_eligible_files()

        if not files_to_format:
            logger.info('No eligible files found to format.')
            return []

        # Append to the cmd the files that will be formatted
        cmd += files_to_format

        # Run command and commit the current revision for `./mach clang-format ...` to reformat its changes
        logger.info('Running ./mach clang-format', cmd=' '.join(cmd))
        clang_output = subprocess.check_output(
            cmd, cwd=settings.repo_dir).decode('utf-8')

        # Dump raw clang-format output as a Taskcluster artifact (for debugging)
        clang_output_path = os.path.join(
            settings.taskcluster.results_dir,
            '{}-clang-format.txt'.format(repr(revision)),
        )
        with open(clang_output_path, 'w') as f:
            f.write(clang_output)

        # Look for any fixes `./mach clang-format` may have found
        # on allowed files
        allowed_paths = [
            os.path.join(settings.repo_dir,
                         path).encode('utf-8')  # needed for hglib
            for path in filter(settings.is_allowed_path, revision.files)
        ]
        client = hglib.open(settings.repo_dir)
        self.diff = client.diff(files=allowed_paths, unified=8).decode('utf-8')

        if not self.diff:
            return []

        # Store that diff as an improvement patch sent to devs
        revision.add_improvement_patch('clang-format', self.diff)

        # Generate a reverse diff for `parsepatch` (in order to get original
        # line numbers from the dev's patch instead of new line numbers)
        reverse_diff = client.diff(unified=8, reverse=True).decode('utf-8')

        # List all the lines that were fixed by `./mach clang-format`
        patch = Patch.parse_patch(reverse_diff, skip_comments=False)
        assert patch != {}, \
            'Empty patch'

        # Build `ClangFormatIssue`s
        issues = []
        for filename, diff in patch.items():
            lines = sorted(diff.get('touched', []) + diff.get('added', []))

            # Group consecutive lines together (algorithm by calixte)
            groups = []
            group = [lines[0]]
            for line in lines[1:]:
                # If the line is not consecutive with the group, start a new
                # group
                if line != group[-1] + 1:
                    groups.append(group)
                    group = []
                group.append(line)

            # Don't forget to add the last group
            groups.append(group)

            issues += [
                ClangFormatIssue(filename, group[0], len(group), revision)
                for group in groups
            ]

        stats.report_issues('clang-format', issues)
        return issues
Beispiel #17
0
    def run(self, revision):
        '''
        Run ./mach clang-format on all of the C/C++ files from the patch
        '''
        assert isinstance(revision, Revision)

        cmd = [
            'gecko-env',
            './mach', '--log-no-times', 'clang-format', '-p'
        ]

        # Returns a list of eligible files for format
        def get_eligible_files():
            files = []
            # Append to the files list each C/C++ file for format
            for file in revision.files:
                # Verify if file is clang-format compliant, meaning that's a C/C++
                _, ext = os.path.splitext(file)
                if ext.lower() in frozenset.union(settings.cpp_extensions, settings.cpp_header_extensions):
                    files.append(file)
            return files

        files_to_format = get_eligible_files()

        if not files_to_format:
            logger.info('No eligible files found to format.')
            return []

        # Append to the cmd the files that will be formatted
        cmd += files_to_format

        # Run command and commit the current revision for `./mach clang-format ...` to reformat its changes
        logger.info('Running ./mach clang-format', cmd=' '.join(cmd))
        clang_output = subprocess.check_output(
            cmd, cwd=settings.repo_dir).decode('utf-8')

        # Dump raw clang-format output as a Taskcluster artifact (for debugging)
        clang_output_path = os.path.join(
            settings.taskcluster.results_dir,
            '{}-clang-format.txt'.format(repr(revision)),
        )
        with open(clang_output_path, 'w') as f:
            f.write(clang_output)

        # Look for any fixes `./mach clang-format` may have found
        # on allowed files
        allowed_paths = [
            os.path.join(settings.repo_dir, path).encode('utf-8')  # needed for hglib
            for path in filter(settings.is_allowed_path, revision.files)
        ]
        client = hglib.open(settings.repo_dir)
        self.diff = client.diff(files=allowed_paths, unified=8).decode('utf-8')

        if not self.diff:
            return []

        # Store that diff as an improvement patch sent to devs
        revision.add_improvement_patch('clang-format', self.diff)

        # Generate a reverse diff for `parsepatch` (in order to get original
        # line numbers from the dev's patch instead of new line numbers)
        reverse_diff = client.diff(unified=8, reverse=True).decode('utf-8')

        # List all the lines that were fixed by `./mach clang-format`
        patch = Patch.parse_patch(reverse_diff, skip_comments=False)
        assert patch != {}, \
            'Empty patch'

        # Build `ClangFormatIssue`s
        issues = []
        for filename, diff in patch.items():
            lines = sorted(diff.get('touched', []) + diff.get('added', []))

            # Group consecutive lines together (algorithm by calixte)
            groups = []
            group = [lines[0]]
            for line in lines[1:]:
                # If the line is not consecutive with the group, start a new
                # group
                if line != group[-1] + 1:
                    groups.append(group)
                    group = []
                group.append(line)

            # Don't forget to add the last group
            groups.append(group)

            issues += [
                ClangFormatIssue(filename, g[0], len(g), revision)
                for g in groups
            ]

        stats.report_issues('clang-format', issues)
        return issues
Beispiel #18
0
    def run(self, revision):
        '''
        Run the static analysis workflow:
         * Pull revision from review
         * Checkout revision
         * Run static analysis
         * Publish results
        '''
        assert revision.mercurial is not None, \
            'Cannot run without a mercurial revision'

        # Add log to find Taskcluster task in papertrail
        logger.info(
            'New static analysis',
            taskcluster_task=self.taskcluster_task_id,
            taskcluster_run=self.taskcluster_run_id,
            channel=settings.app_channel,
            revision=revision,
        )

        # Setup tools (clang & mozlint)
        clang_tidy = CLANG_TIDY in self.analyzers and ClangTidy(
            self.repo_dir, settings.target)
        clang_format = CLANG_FORMAT in self.analyzers and ClangFormat(
            self.repo_dir)
        mozlint = MOZLINT in self.analyzers and MozLint(self.repo_dir)

        # Force cleanup to reset tip
        # otherwise previous pull are there
        self.hg.update(rev=b'tip', clean=True)

        # Pull revision from review
        self.hg.pull(source=REPO_REVIEW,
                     rev=revision.mercurial,
                     update=True,
                     force=True)

        # Update to the target revision
        self.hg.update(rev=revision.mercurial, clean=True)

        # Get the parents revisions
        parent_rev = 'parents({})'.format(revision.mercurial)
        parents = self.hg.identify(id=True,
                                   rev=parent_rev).decode('utf-8').strip()

        # Find modified files by this revision
        modified_files = []
        for parent in parents.split('\n'):
            changeset = '{}:{}'.format(parent, revision.mercurial)
            status = self.hg.status(change=[
                changeset,
            ])
            modified_files += [f.decode('utf-8') for _, f in status]
        logger.info('Modified files', files=modified_files)

        # List all modified lines from current revision changes
        patch = Patch.parse_patch(
            self.hg.diff(change=revision.mercurial, git=True).decode('utf-8'),
            skip_comments=False,
        )
        modified_lines = {
            # Use all changes in new files
            filename: diff.get('touched', []) + diff.get('added', [])
            for filename, diff in patch.items()
        }

        # mach configure with mozconfig
        logger.info('Mach configure...')
        run_check(['gecko-env', './mach', 'configure'], cwd=self.repo_dir)

        # Build CompileDB backend
        logger.info('Mach build backend...')
        cmd = ['gecko-env', './mach', 'build-backend', '--backend=CompileDB']
        run_check(cmd, cwd=self.repo_dir)

        # Build exports
        logger.info('Mach build exports...')
        run_check(['gecko-env', './mach', 'build', 'pre-export'],
                  cwd=self.repo_dir)
        run_check(['gecko-env', './mach', 'build', 'export'],
                  cwd=self.repo_dir)

        # Run static analysis through clang-tidy
        issues = []
        if clang_tidy:
            logger.info('Run clang-tidy...')
            issues += clang_tidy.run(settings.clang_checkers, modified_lines)
        else:
            logger.info('Skip clang-tidy')

        # Run clang-format on modified files
        diff_url = None
        if clang_format:
            logger.info('Run clang-format...')
            format_issues, patched = clang_format.run(settings.cpp_extensions,
                                                      modified_lines)
            issues += format_issues
            if patched:
                # Get current diff on these files
                logger.info('Found clang-format issues', files=patched)
                files = list(
                    map(
                        lambda x: os.path.join(self.repo_dir, x).encode(
                            'utf-8'), patched))
                diff = self.hg.diff(files)
                assert diff is not None and diff != b'', \
                    'Empty diff'

                # Write diff in results directory
                diff_path = os.path.join(self.taskcluster_results_dir,
                                         revision.build_diff_name())
                with open(diff_path, 'w') as f:
                    length = f.write(diff.decode('utf-8'))
                    logger.info('Diff from clang-format dumped',
                                path=diff_path,
                                length=length)  # noqa

                # Build diff download url
                diff_url = ARTIFACT_URL.format(
                    task_id=self.taskcluster_task_id,
                    run_id=self.taskcluster_run_id,
                    diff_name=revision.build_diff_name(),
                )
                logger.info('Diff available online', url=diff_url)
            else:
                logger.info('No clang-format issues')

        else:
            logger.info('Skip clang-format')

        # Run linter
        if mozlint:
            logger.info('Run mozlint...')
            issues += mozlint.run(modified_lines)
        else:
            logger.info('Skip mozlint')

        logger.info('Detected {} issue(s)'.format(len(issues)))
        if not issues:
            logger.info('No issues, stopping there.')
            return

        # Publish reports about these issues
        for reporter in self.reporters.values():
            reporter.publish(issues, revision, diff_url)
Beispiel #19
0
 def test_diff_r(self):
     path = 'tests/patches/janx.patch'
     patch = self.readfile(path)
     r1 = Patch.parse_patch(patch, skip_comments=False)
     r2 = self.get_touched(patch)
     self.compare(r1, r2)
Beispiel #20
0
 def test_remote(self):
     self.assertEqual(Patch.parse_changeset('https://hg.mozilla.org/mozilla-central/raw-rev', '8be2131ed183', get_hunks=True), self.knownCorrect)
Beispiel #21
0
def _transform(commit):
    desc = commit.desc.decode("utf-8")

    obj = {
        "node": commit.node.decode("utf-8"),
        "author": commit.author.decode("utf-8"),
        "reviewers": commit.reviewers,
        "desc": desc,
        "date": str(commit.date),
        "pushdate": str(commit.pushdate),
        "bug_id": int(commit.bug.decode("utf-8")) if commit.bug else None,
        "ever_backedout": commit.backedoutby != b"",
        "added": 0,
        "test_added": 0,
        "deleted": 0,
        "test_deleted": 0,
        "files_modified_num": 0,
        "types": set(),
        "components": list(),
        "author_experience": experiences_by_commit["total"]["author"][commit.node],
        f"author_experience_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[
            EXPERIENCE_TIMESPAN_TEXT
        ]["author"][commit.node],
        "reviewer_experience": experiences_by_commit["total"]["reviewer"][commit.node],
        f"reviewer_experience_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[
            EXPERIENCE_TIMESPAN_TEXT
        ]["reviewer"][commit.node],
        "author_email": commit.author_email.decode("utf-8"),
        "components_touched_prev": experiences_by_commit["total"]["component"][
            commit.node
        ],
        f"components_touched_prev_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[
            EXPERIENCE_TIMESPAN_TEXT
        ]["component"][commit.node],
        "files_touched_prev": experiences_by_commit["total"]["file"][commit.node],
        f"files_touched_prev_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[
            EXPERIENCE_TIMESPAN_TEXT
        ]["file"][commit.node],
        "directories_touched_prev": experiences_by_commit["total"]["directory"][
            commit.node
        ],
        f"directories_touched_prev_{EXPERIENCE_TIMESPAN_TEXT}": experiences_by_commit[
            EXPERIENCE_TIMESPAN_TEXT
        ]["directory"][commit.node],
    }

    sizes = []

    patch = HG.export(revs=[commit.node], git=True)
    patch_data = Patch.parse_patch(
        patch.decode("utf-8", "ignore"), skip_comments=False, add_lines_for_new=True
    )
    for path, stats in patch_data.items():
        if "added" not in stats:
            # Must be a binary file
            obj["types"].add("binary")
            continue

        if is_test(path):
            obj["test_added"] += len(stats["added"]) + len(stats["touched"])
            obj["test_deleted"] += len(stats["deleted"]) + len(stats["touched"])
        else:
            obj["added"] += len(stats["added"]) + len(stats["touched"])
            obj["deleted"] += len(stats["deleted"]) + len(stats["touched"])

        ext = os.path.splitext(path)[1]
        if ext in [".js", ".jsm"]:
            type_ = "JavaScript"
        elif ext in [
            ".c",
            ".cpp",
            ".cc",
            ".cxx",
            ".m",
            ".mm",
            ".h",
            ".hh",
            ".hpp",
            ".hxx",
        ]:
            type_ = "C/C++"
        elif ext == ".java":
            type_ = "Java"
        elif ext == ".py":
            type_ = "Python"
        elif ext == ".rs":
            type_ = "Rust"
        else:
            type_ = ext
        obj["types"].add(type_)

        try:
            after = HG.cat([path.encode("utf-8")], rev=commit.node)
        except hglib.error.CommandError as e:
            if b"no such file in rev" in e.err:
                after = b""
            else:
                raise

        sizes.append(after.count(b"\n"))

    obj["total_file_size"] = sum(sizes)
    obj["average_file_size"] = (
        obj["total_file_size"] / len(sizes) if len(sizes) > 0 else 0
    )
    obj["maximum_file_size"] = max(sizes)
    obj["minimum_file_size"] = min(sizes)

    obj["files_modified_num"] = len(patch_data)

    # Covert to a list, as a set is not JSON-serializable.
    obj["types"] = list(obj["types"])

    obj["components"] = list(
        set(
            path_to_component[path]
            for path in patch_data.keys()
            if path_to_component.get(path)
        )
    )

    return obj
Beispiel #22
0
    def run(self, revision):
        '''
        Run ./mach clang-format on the current patch
        '''
        assert isinstance(revision, Revision)

        # Commit the current revision for `./mach clang-format` to reformat its changes
        cmd = [
            'gecko-env',
            './mach',
            '--log-no-times',
            'clang-format',
        ]
        logger.info('Running ./mach clang-format', cmd=' '.join(cmd))

        # Run command
        clang_output = subprocess.check_output(
            cmd, cwd=settings.repo_dir).decode('utf-8')

        # Dump raw clang-format output as a Taskcluster artifact (for debugging)
        clang_output_path = os.path.join(
            settings.taskcluster.results_dir,
            '{}-clang-format.txt'.format(repr(revision)),
        )
        with open(clang_output_path, 'w') as f:
            f.write(clang_output)

        # Look for any fixes `./mach clang-format` may have found
        client = hglib.open(settings.repo_dir)
        self.diff = client.diff(unified=8).decode('utf-8')

        if not self.diff:
            return []

        # Store that diff as an improvement patch sent to devs
        revision.add_improvement_patch('clang-format', self.diff)

        # Generate a reverse diff for `parsepatch` (in order to get original
        # line numbers from the dev's patch instead of new line numbers)
        reverse_diff = client.diff(unified=8, reverse=True).decode('utf-8')

        # List all the lines that were fixed by `./mach clang-format`
        patch = Patch.parse_patch(reverse_diff, skip_comments=False)
        assert patch != {}, \
            'Empty patch'

        # Build `ClangFormatIssue`s
        issues = []
        for filename, diff in patch.items():
            lines = sorted(diff.get('touched', []) + diff.get('added', []))

            # Group consecutive lines together (algorithm by calixte)
            groups = []
            group = [lines[0]]
            for line in lines[1:]:
                # If the line is not consecutive with the group, start a new
                # group
                if line != group[-1] + 1:
                    groups.append(group)
                    group = []
                group.append(line)

            # Don't forget to add the last group
            groups.append(group)

            issues += [
                ClangFormatIssue(filename, group[0], len(group), revision)
                for group in groups
            ]

        stats.report_issues('clang-format', issues)
        return issues
Beispiel #23
0
def _transform(commit):
    desc = commit.desc.decode("utf-8")

    obj = {
        "author":
        commit.author.decode("utf-8"),
        "desc":
        desc,
        "date":
        str(commit.date),
        "bug_id":
        int(commit.bug.decode("utf-8")) if commit.bug else None,
        "ever_backedout":
        commit.backedoutby != b"",
        "added":
        0,
        "test_added":
        0,
        "deleted":
        0,
        "test_deleted":
        0,
        "files_modified_num":
        0,
        "types":
        set(),
        "components":
        list(),
        "author_experience":
        author_experience[commit.node],
        "author_experience_90_days":
        author_experience_90_days[commit.node],
        "author_email":
        commit.author_email.decode("utf-8"),
        "components_touched_prev":
        components_touched_prev[commit.node],
        "components_touched_prev_90_days":
        components_touched_prev_90_days[commit.node],
        "files_touched_prev":
        files_touched_prev[commit.node],
        "files_touched_prev_90_days":
        files_touched_prev_90_days[commit.node],
    }

    patch = HG.export(revs=[commit.node], git=True)
    patch_data = Patch.parse_patch(patch.decode("utf-8", "ignore"),
                                   skip_comments=False,
                                   add_lines_for_new=True)
    for path, stats in patch_data.items():
        if "added" not in stats:
            # Must be a binary file
            obj["types"].add("binary")
            continue

        if is_test(path):
            obj["test_added"] += len(stats["added"]) + len(stats["touched"])
            obj["test_deleted"] += len(stats["deleted"]) + len(
                stats["touched"])
        else:
            obj["added"] += len(stats["added"]) + len(stats["touched"])
            obj["deleted"] += len(stats["deleted"]) + len(stats["touched"])

        ext = os.path.splitext(path)[1]
        if ext in [".js", ".jsm"]:
            type_ = "JavaScript"
        elif ext in [
                ".c",
                ".cpp",
                ".cc",
                ".cxx",
                ".m",
                ".mm",
                ".h",
                ".hh",
                ".hpp",
                ".hxx",
        ]:
            type_ = "C/C++"
        elif ext == ".java":
            type_ = "Java"
        elif ext == ".py":
            type_ = "Python"
        elif ext == ".rs":
            type_ = "Rust"
        else:
            type_ = ext
        obj["types"].add(type_)

    obj["files_modified_num"] = len(patch_data)

    # Covert to a list, as a set is not JSON-serializable.
    obj["types"] = list(obj["types"])

    obj["components"] = list(
        set(path_to_component[path] for path in patch_data.keys()
            if path_to_component.get(path)))

    return obj