def compile(self) -> None: """Compile the project.""" brotli_version_source = local.path(self.source_of_primary) brotli_git_path = get_local_project_git_path(self.NAME) brotli_version = ShortCommitHash(self.version_of_primary) with local.cwd(brotli_git_path): configure_revisions = get_all_revisions_between( "f9ab24a7aaee93d5932ba212e5e3d32e4306f748", "5814438791fb2d4394b46e5682a96b68cd092803", ShortCommitHash) simple_make_revisions = get_all_revisions_between( "e1739826c04a9944672b99b98249dda021bdeb36", "378485b097fd7b80a5e404a3cb912f7b18f78cdb", ShortCommitHash) c_compiler = bb.compiler.cc(self) if brotli_version in simple_make_revisions: with local.cwd(brotli_version_source / "tools"): bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) elif brotli_version in configure_revisions: with local.cwd(brotli_version_source): with local.env(CC=str(c_compiler)): bb.watch(local["./configure"])() bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) else: mkdir(brotli_version_source / "out") with local.cwd(brotli_version_source / "out"): with local.env(CC=str(c_compiler)): bb.watch(local["../configure-cmake"])() bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) with local.cwd(brotli_version_source): verify_binaries(self)
def binaries_for_revision( revision: ShortCommitHash) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Bzip2.NAME)) binary_map.specify_binary('build/bzip2', BinaryType.EXECUTABLE) return binary_map[revision]
def binaries_for_revision( revision: ShortCommitHash) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Brotli.NAME)) binary_map.specify_binary( "out/brotli", BinaryType.EXECUTABLE, only_valid_in=RevisionRange( "03739d2b113afe60638069c4e1604dc2ac27380d", "HEAD")) binary_map.specify_binary( "out/bro", BinaryType.EXECUTABLE, only_valid_in=RevisionRange( "5814438791fb2d4394b46e5682a96b68cd092803", "03739d2b113afe60638069c4e1604dc2ac27380d")) binary_map.specify_binary( "bin/bro", BinaryType.EXECUTABLE, only_valid_in=RevisionRange( "f9ab24a7aaee93d5932ba212e5e3d32e4306f748", "5814438791fb2d4394b46e5682a96b68cd092803")) binary_map.specify_binary( "tools/bro", BinaryType.EXECUTABLE, only_valid_in=RevisionRange( "e1739826c04a9944672b99b98249dda021bdeb36", "378485b097fd7b80a5e404a3cb912f7b18f78cdb")) return binary_map[revision]
def get_commit_map(project_name: str, cmap_path: tp.Optional[Path] = None, end: str = "HEAD", start: tp.Optional[str] = None) -> CommitMap: """ Get a commit map for a project. Range of commits that get included in the map: `]start..end]` Args: project_name: name of the project cmap_path: path to a existing commit map file end: last commit that should be included in the map start: commit before the first commit that should be included in the map Returns: a bidirectional commit map from commits to time IDs """ if cmap_path is None: project_git_path = get_local_project_git_path(project_name) primary_source = get_primary_project_source(project_name) refspec = "HEAD" if hasattr(primary_source, "refspec"): refspec = primary_source.refspec return generate_commit_map(project_git_path, end, start, refspec) return load_commit_map_from_path(cmap_path)
def compile(self) -> None: """Compile the project.""" xz_git_path = get_local_project_git_path(self.NAME) xz_version_source = local.path(self.source_of_primary) xz_version = self.version_of_primary # dynamic linking is off by default until # commit f9907503f882a745dce9d84c2968f6c175ba966a # (fda4724 is its parent) with local.cwd(xz_git_path): revisions_wo_dynamic_linking = get_all_revisions_between( "5d018dc03549c1ee4958364712fb0c94e1bf2741", "fda4724d8114fccfa31c1839c15479f350c2fb4c", ShortCommitHash) self.cflags += ["-fPIC"] clang = bb.compiler.cc(self) with local.cwd(xz_version_source): with local.env(CC=str(clang)): bb.watch(autoreconf)("--install") configure = bb.watch(local["./configure"]) if xz_version in revisions_wo_dynamic_linking: configure("--enable-dynamic=yes") else: configure() bb.watch(make)("-j", get_number_of_jobs(bb_cfg())) verify_binaries(self)
def __gen_sample(ctx: click.Context, distribution: str, end: str, start: str, num_rev: int, only_code_commits: bool) -> None: """ Add revisions based on a sampling Distribution. Distribution: The sampling method to use """ sampling_method: NormalSamplingMethod = NormalSamplingMethod \ .get_sampling_method_type( distribution )() project_repo_path = get_local_project_git_path(ctx.obj['project']) if end != "HEAD" and not is_commit_hash(end): end = get_commits_before_timestamp(end, project_repo_path)[0].hash if start is not None and not is_commit_hash(start): commits_before = get_commits_before_timestamp(start, project_repo_path) if commits_before: start = commits_before[0].hash else: start = get_initial_commit(project_repo_path).hash cmap = create_lazy_commit_map_loader(ctx.obj['project'], None, end, start)() extend_with_distrib_sampling(ctx.obj['case_study'], cmap, sampling_method, ctx.obj['merge_stage'], num_rev, ctx.obj['ignore_blocked'], only_code_commits) store_case_study(ctx.obj['case_study'], ctx.obj['path'])
def get_submodule_head(project_name: str, submodule_name: str, commit: FullCommitHash) -> FullCommitHash: """ Retrieve the checked out commit for a submodule of a project. Args: project_name: name of the project submodule_name: name of the submodule commit: commit of the project's main repo Returns: checked out commit of the submodule """ if submodule_name == get_primary_project_source(project_name).local: return commit main_repo = get_local_project_git_path(project_name) submodule_status = git(__get_git_path_arg(main_repo), "ls-tree", commit) commit_pattern = re.compile(r"[0-9]* commit ([0-9abcdef]*)\t" + submodule_name) match = commit_pattern.search(submodule_status) if match: return FullCommitHash(match.group(1)) raise AssertionError(f"Unknown submodule {submodule_name}")
def binaries_for_revision( revision: ShortCommitHash) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(File.NAME)) binary_map.specify_binary("src/.libs/file", BinaryType.EXECUTABLE) return binary_map[revision]
def revisions_of_project(self) -> None: """Generate the Revision list for the selected project if select specific is enabled.""" self.strategie_forms.setCurrentIndex( GenerationStrategie.SELECT_REVISION.value) if self.selected_project != self.revision_list_project: self.revision_details.setText("Loading Revisions") self.revision_details.repaint() get_local_project_git(self.selected_project).remotes[0].fetch() git_path = get_local_project_git_path(self.selected_project) initial_commit = get_initial_commit(git_path).hash commits = get_all_revisions_between(initial_commit, 'HEAD', FullCommitHash, git_path) commit_lookup_helper = create_commit_lookup_helper( self.selected_project) project = get_project_cls_by_name(self.selected_project) repo_name = get_primary_project_source(self.selected_project).local commits = map(lambda commit: CommitRepoPair(commit, repo_name), commits) cmap = get_commit_map(self.selected_project) commit_model = CommitTableModel( list(map(commit_lookup_helper, commits)), cmap, project) self.proxy_model.setSourceModel(commit_model) self.revision_list_project = self.selected_project self.revision_details.clear() self.revision_details.update()
def gen(self) -> None: """Generate the case study using the selected strategy, project and strategy specific arguments.""" cmap = create_lazy_commit_map_loader(self.selected_project, None, 'HEAD', None)() version = self.cs_version.value() case_study = CaseStudy(self.revision_list_project, version) paper_config = vara_cfg()["paper_config"]["current_config"].value path = Path(vara_cfg()["paper_config"]["folder"].value) / ( paper_config + f"/{self.revision_list_project}_{version}.case_study") if self.strategie_forms.currentIndex( ) == GenerationStrategie.SAMPLE.value: sampling_method = NormalSamplingMethod.get_sampling_method_type( self.sampling_method.currentText()) extend_with_distrib_sampling(case_study, cmap, sampling_method(), 0, self.num_revs.value(), True, self.code_commits.clicked) elif self.strategie_forms.currentIndex( ) == GenerationStrategie.SELECT_REVISION.value: selected_rows = self.revision_list.selectionModel().selectedRows(0) selected_commits = [row.data() for row in selected_rows] extend_with_extra_revs(case_study, cmap, selected_commits, 0) self.revision_list.clearSelection() self.revision_list.update() elif self.strategie_forms.currentIndex( ) == GenerationStrategie.REVS_PER_YEAR.value: extend_with_revs_per_year( case_study, cmap, 0, True, get_local_project_git_path(self.selected_project), self.revs_per_year.value(), self.seperate.checkState()) store_case_study(case_study, path)
def binaries_for_revision( revision: ShortCommitHash) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Glib.NAME)) binary_map.specify_binary('build/glib/libglib-2.0.so', BinaryType.SHARED_LIBRARY) return binary_map[revision]
def binaries_for_revision( revision: ShortCommitHash) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap( get_local_project_git_path(LibjpegTurbo.NAME)) binary_map.specify_binary("libjpeg.so", BinaryType.SHARED_LIBRARY) return binary_map[revision]
def test_get_initial_commit_with_specified_path(self) -> None: """Check if we can correctly retrieve the inital commit of a repo.""" inital_commit = get_initial_commit( get_local_project_git_path("FeaturePerfCSCollection")) self.assertEqual( FullCommitHash("4d84c8f80ec2db3aaa880d323f7666752c4be51d"), inital_commit)
def binaries_for_revision( revision: ShortCommitHash ) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Irssi.NAME)) binary_map.specify_binary('./src/fe-text/irssi', BinaryType.EXECUTABLE) return binary_map[revision]
def binaries_for_revision( revision: ShortCommitHash ) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Busybox.NAME)) binary_map.specify_binary("PLEASE_REPLACE_ME", BinaryType.EXECUTABLE) return binary_map[revision]
def binaries_for_revision( revision: ShortCommitHash # pylint: disable=W0613 ) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Libvpx.NAME)) binary_map.specify_binary("vpxdec", BinaryType.EXECUTABLE) binary_map.specify_binary("vpxenc", BinaryType.EXECUTABLE) return binary_map[revision]
def binaries_for_revision( revision: ShortCommitHash) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path( MongoDB.NAME)) # TODO: please add correct binary names binary_map.specify_binary("MISSING", BinaryType.EXECUTABLE) return binary_map[revision]
def __init__(self, project: tp.Type[Project]) -> None: super().__init__(project) project_name = project.NAME if issubclass(project, CVEProviderHook): self.__cve_map: CVEDict = generate_cve_map( get_local_project_git_path(project_name), project.get_cve_product_info()) else: raise ValueError(f"Project {project} does not implement " f"CVEProviderHook.")
def binaries_for_revision( revision: ShortCommitHash ) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Qemu.NAME)) binary_map.specify_binary( "build/x86_64-softmmu/qemu-system-x86_64", BinaryType.EXECUTABLE ) return binary_map[revision]
def create_graph() -> nx.DiGraph: repos = get_local_project_gits(self.project_name) interaction_graph = nx.DiGraph() churn_config = ChurnConfig.create_c_style_languages_config() file_pattern = re.compile(r"|".join( churn_config.get_extensions_repr(prefix=r"\.", suffix=r"$"))) blame_regex = re.compile( r"^([0-9a-f]+)\s+(?:.+\s+)?[\d]+\) ?(.*)$") for repo_name in repos: repo_path = get_local_project_git_path(self.project_name, repo_name) project_git = git["-C", str(repo_path)] head_commit = get_submodule_head(self.project_name, repo_name, self.__head_commit) file_names = project_git("ls-tree", "--full-tree", "--name-only", "-r", head_commit).split("\n") files: tp.List[Path] = [ repo_path / path for path in file_names if file_pattern.search(path) ] for file in files: nodes: tp.Set[BIGNodeTy] = set() blame_lines: str = project_git( "blame", "-w", "-s", "-l", "--root", head_commit, "--", str(file.relative_to(repo_path))) for line in blame_lines.strip().split("\n"): match = blame_regex.match(line) if not match: raise AssertionError if match.group(2): nodes.add( BlameTaintData( CommitRepoPair( FullCommitHash(match.group(1)), repo_name))) for node in nodes: interaction_graph.add_node(node, blame_taint_data=node) for commit_a, commit_b in itertools.product(nodes, repeat=2): if commit_a != commit_b: if not interaction_graph.has_edge( commit_a, commit_b): interaction_graph.add_edge(commit_a, commit_b, amount=0) interaction_graph[commit_a][commit_b][ "amount"] += 1 return interaction_graph
def binaries_for_revision( revision: ShortCommitHash) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap( get_local_project_git_path( TwoLibsOneProjectInteractionDiscreteLibsSingleProject.NAME)) binary_map.specify_binary( local.path("revision") / "build/test_prog/elementalist/elementalist", BinaryType.EXECUTABLE) return binary_map[revision]
def binaries_for_revision( revision: ShortCommitHash ) -> tp.List[ProjectBinaryWrapper]: binary_map = RevisionBinaryMap(get_local_project_git_path(Redis.NAME)) binary_map.specify_binary( 'src/redis-server', BinaryType.EXECUTABLE, override_binary_name='redis_server' ) return binary_map[revision]
def test_get_submodule_head_main_repo(self): """Check if correct main repo commit is retrieved.""" repo_path = get_local_project_git_path("grep") old_head = get_head_commit(repo_path) repo_head = FullCommitHash("cb15dfa4b2d7fba0d50e87b49f979c7f996b8ebc") checkout_branch_or_commit(repo_path, repo_head) try: submodule_head = get_submodule_head("grep", "grep", repo_head) self.assertEqual(submodule_head, repo_head) finally: checkout_branch_or_commit(repo_path, old_head)
def test_one_commit_diff(self): """Check if we get the correct code churn for a single commit.""" repo_path = get_local_project_git_path("brotli") files_changed, insertions, deletions = calc_commit_code_churn( repo_path, FullCommitHash("0c5603e07bed1d5fbb45e38f9bdf0e4560fde3f0"), ChurnConfig.create_c_style_languages_config()) self.assertEqual(files_changed, 1) self.assertEqual(insertions, 2) self.assertEqual(deletions, 2)
def test_get_commits_before_timestamp(self) -> None: """Check if we can correctly determine the commits before a specific timestamp.""" project_repo = get_local_project_git_path('brotli') brotli_commits_after = get_commits_before_timestamp( '2013-10-24', project_repo) # newest found commit should be self.assertEqual(brotli_commits_after[0].hash, "c66e4e3e4fc3ba36ca36a43eee3b704f7b989c60") # oldest commit should be self.assertEqual(brotli_commits_after[-1].hash, "8f30907d0f2ef354c2b31bdee340c2b11dda0fb0")
def test_one_commit_diff_2(self): """Check if we get the correct code churn for a single commit.""" repo_path = get_local_project_git_path("brotli") files_changed, insertions, deletions = calc_commit_code_churn( repo_path, FullCommitHash("fc823290a76a260b7ba6f47ab5f52064a0ce19ff"), ChurnConfig.create_c_style_languages_config()) self.assertEqual(files_changed, 1) self.assertEqual(insertions, 5) self.assertEqual(deletions, 0)
def test_one_commit_diff_3(self): """Check if we get the correct code churn for a single commit.""" repo_path = get_local_project_git_path("brotli") files_changed, insertions, deletions = calc_commit_code_churn( repo_path, FullCommitHash("924b2b2b9dc54005edbcd85a1b872330948cdd9e"), ChurnConfig.create_c_style_languages_config()) self.assertEqual(files_changed, 3) self.assertEqual(insertions, 38) self.assertEqual(deletions, 7)
def test_get_commits_after_timestamp(self) -> None: """Check if we can correctly determine the commits after a specific timestamp.""" project_repo = get_local_project_git_path('brotli') brotli_commits_after = get_commits_after_timestamp( '2021-01-01', project_repo) # oldest found commit should be self.assertEqual(brotli_commits_after[-1].hash, "4969984a95534a508f93b38c74d150e86ef333f4") # second oldest commit should be self.assertEqual(brotli_commits_after[-2].hash, "0e8afdc968f3b7c891379e558b8dcaf42d93703b")
def test_commit_range(self): """Check if we get the correct code churn for commit range.""" repo_path = get_local_project_git_path("brotli") files_changed, insertions, deletions = calc_code_churn( repo_path, FullCommitHash("36ac0feaf9654855ee090b1f042363ecfb256f31"), FullCommitHash("924b2b2b9dc54005edbcd85a1b872330948cdd9e"), ChurnConfig.create_c_style_languages_config()) self.assertEqual(files_changed, 3) self.assertEqual(insertions, 49) self.assertEqual(deletions, 11)
def build_revisions_churn_table( project_name: str, commit_map: CommitMap, revisions: tp.List[FullCommitHash]) -> pd.DataFrame: """ Build a pandas data frame that contains all churn related data for the given list of revisions. The churn is calculated as the diff between two successive revisions in the ``revisions`` list. Table layout: "revision", "time_id", "insertions", "deletions", "changed_files" Args: project_name: name of the project commit_map: CommitMap for the given project(by project_name) revisions: list of revisions used to calculate the churn data Returns: a data frame containing the churn data """ def create_dataframe_layout() -> pd.DataFrame: df_layout = pd.DataFrame(columns=[ "revision", "time_id", "insertions", "deletions", "changed_files" ]) df_layout.time_id = df_layout.time_id.astype('int32') df_layout.insertions = df_layout.insertions.astype('int64') df_layout.deletions = df_layout.deletions.astype('int64') df_layout.changed_files = df_layout.changed_files.astype('int64') return df_layout repo_path = get_local_project_git_path(project_name) revision_pairs = zip(*(islice(revisions, i, None) for i in range(2))) code_churn = [(0, 0, 0)] code_churn.extend([ calc_code_churn(repo_path, a, b, ChurnConfig.create_c_style_languages_config()) for a, b in revision_pairs ]) churn_data = pd.DataFrame({ "revision": revisions, "time_id": [commit_map.time_id(x) for x in revisions], "insertions": [x[1] for x in code_churn], "deletions": [x[2] for x in code_churn], "changed_files": [x[0] for x in code_churn] }) return pd.concat([create_dataframe_layout(), churn_data])