def download_commits(repo_dir, rev_start=0, save=True, use_single_process=False): with hglib.open(repo_dir) as hg: revs = get_revs(hg, rev_start) if len(revs) == 0: print("No commits to analyze") return [] first_pushdate = get_first_pushdate(repo_dir) print(f"Mining {len(revs)} commits...") if not use_single_process: print(f"Using {os.cpu_count()} processes...") commits = hg_log_multi(repo_dir, revs) else: with hglib.open(repo_dir) as hg: commits = hg_log(hg, revs) print("Downloading file->component mapping...") download_component_mapping() set_commits_to_ignore(repo_dir, commits) commits_num = len(commits) print(f"Mining {commits_num} commits...") global rs_parsepatch import rs_parsepatch global code_analysis_server code_analysis_server = rust_code_analysis_server.RustCodeAnalysisServer() if not use_single_process: with concurrent.futures.ProcessPoolExecutor( initializer=_init_process, initargs=(repo_dir,) ) as executor: commits = executor.map(_transform, commits, chunksize=64) commits = tqdm(commits, total=commits_num) commits = list(commits) else: with hglib.open(repo_dir) as hg: commits = [transform(hg, repo_dir, c) for c in commits] code_analysis_server.terminate() calculate_experiences(commits, first_pushdate, save) commits = [commit.to_dict() for commit in commits if not commit.ignored] if save: db.append(COMMITS_DB, commits) return commits
def download_commits( repo_dir: str, rev_start: str = None, revs: List[bytes] = None, save: bool = True, use_single_process: bool = False, include_no_bug: bool = False, include_backouts: bool = False, include_ignored: bool = False, ) -> Tuple[CommitDict, ...]: assert revs is not None or rev_start is not None with hglib.open(repo_dir) as hg: if revs is None: revs = get_revs(hg, rev_start) if len(revs) == 0: logger.info("No commits to analyze") return tuple() first_pushdate = get_first_pushdate(repo_dir) logger.info(f"Mining {len(revs)} commits...") if not use_single_process: logger.info(f"Using {os.cpu_count()} processes...") commits = hg_log_multi(repo_dir, revs) else: commits = hg_log(hg, revs) if save or not os.path.exists("data/component_mapping.lmdb"): logger.info("Downloading file->component mapping...") download_component_mapping() set_commits_to_ignore(hg, repo_dir, commits) commits_num = len(commits) logger.info(f"Mining {commits_num} patches...") global code_analysis_server code_analysis_server = rust_code_analysis_server.RustCodeAnalysisServer( ) if not use_single_process: with concurrent.futures.ProcessPoolExecutor( initializer=_init_process, initargs=(repo_dir, )) as executor: commits = executor.map(_transform, commits, chunksize=64) commits = tqdm(commits, total=commits_num) commits = tuple(commits) else: get_component_mapping() commits = tuple(transform(hg, repo_dir, c) for c in commits) close_component_mapping() code_analysis_server.terminate() calculate_experiences(commits, first_pushdate, save) logger.info("Applying final commits filtering...") commits = tuple(commit.to_dict() for commit in commits) if save: db.append(COMMITS_DB, commits) return tuple( filter_commits( commits, include_no_bug=include_no_bug, include_backouts=include_backouts, include_ignored=include_ignored, ))
def test_get_touched_functions(): # Allow using the local code analysis server. responses.add_passthru("http://127.0.0.1") repository.code_analysis_server = rust_code_analysis_server.RustCodeAnalysisServer() # No function touched. touched_functions = repository.get_touched_functions( "file.cpp", [], [], """void func1() { int i = 1; } void func2() { int i = 2; }""", ) assert touched_functions == set() # A function touched by adding a line. touched_functions = repository.get_touched_functions( "file.cpp", [], [1], """void func1() { int i = 1; } void func2() { int i = 2; }""", ) assert touched_functions == {("func1", 1, 3)} # A function touched by removing a line, another function touched by adding a line. touched_functions = repository.get_touched_functions( "file.cpp", [2, 5, 6, 7, 8], [6], """void func1() { int i = 1; } void func3() { int i = 3; } void func4() { int i = 4; }""", ) assert touched_functions == {("func3", 5, 7), ("func1", 1, 3)} # A function touched by replacing a line. touched_functions = repository.get_touched_functions( "file.cpp", [6], [6], """void func1() { int i = 1; } void func2() { int i = 2; }""", ) assert touched_functions == {("func2", 5, 7)} # top-level and a JavaScript function touched. touched_functions = repository.get_touched_functions( "file.js", [], [1, 4], """let j = 0; function func() { let i = 0; }""", ) assert touched_functions == {("func", 3, 5)} # An anonymous function touched inside another function. touched_functions = repository.get_touched_functions( "file.jsm", [], [4], """function outer_func() { let i = 0; let f = function() { let j = 0; }(); }""", ) assert touched_functions == {("outer_func", 1, 6)} # A function touched inside another function. touched_functions = repository.get_touched_functions( "file.jsm", [], [4], """function outer_func() { let i = 0; function inner_func() { let j = 0; } }""", ) assert touched_functions == {("outer_func", 1, 6), ("inner_func", 3, 5)}